35#include "llvm/IR/IntrinsicsAArch64.h"
36#include "llvm/IR/IntrinsicsAMDGPU.h"
37#include "llvm/IR/IntrinsicsARM.h"
38#include "llvm/IR/IntrinsicsNVPTX.h"
39#include "llvm/IR/IntrinsicsRISCV.h"
40#include "llvm/IR/IntrinsicsWebAssembly.h"
41#include "llvm/IR/IntrinsicsX86.h"
64 cl::desc(
"Disable autoupgrade of debug info"));
83 Type *Arg0Type =
F->getFunctionType()->getParamType(0);
98 Type *LastArgType =
F->getFunctionType()->getParamType(
99 F->getFunctionType()->getNumParams() - 1);
114 if (
F->getReturnType()->isVectorTy())
127 Type *Arg1Type =
F->getFunctionType()->getParamType(1);
128 Type *Arg2Type =
F->getFunctionType()->getParamType(2);
145 Type *Arg1Type =
F->getFunctionType()->getParamType(1);
146 Type *Arg2Type =
F->getFunctionType()->getParamType(2);
160 if (
F->getReturnType()->getScalarType()->isBFloatTy())
170 if (
F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
184 if (Name.consume_front(
"avx."))
185 return (Name.starts_with(
"blend.p") ||
186 Name ==
"cvt.ps2.pd.256" ||
187 Name ==
"cvtdq2.pd.256" ||
188 Name ==
"cvtdq2.ps.256" ||
189 Name.starts_with(
"movnt.") ||
190 Name.starts_with(
"sqrt.p") ||
191 Name.starts_with(
"storeu.") ||
192 Name.starts_with(
"vbroadcast.s") ||
193 Name.starts_with(
"vbroadcastf128") ||
194 Name.starts_with(
"vextractf128.") ||
195 Name.starts_with(
"vinsertf128.") ||
196 Name.starts_with(
"vperm2f128.") ||
197 Name.starts_with(
"vpermil."));
199 if (Name.consume_front(
"avx2."))
200 return (Name ==
"movntdqa" ||
201 Name.starts_with(
"pabs.") ||
202 Name.starts_with(
"padds.") ||
203 Name.starts_with(
"paddus.") ||
204 Name.starts_with(
"pblendd.") ||
206 Name.starts_with(
"pbroadcast") ||
207 Name.starts_with(
"pcmpeq.") ||
208 Name.starts_with(
"pcmpgt.") ||
209 Name.starts_with(
"pmax") ||
210 Name.starts_with(
"pmin") ||
211 Name.starts_with(
"pmovsx") ||
212 Name.starts_with(
"pmovzx") ||
214 Name ==
"pmulu.dq" ||
215 Name.starts_with(
"psll.dq") ||
216 Name.starts_with(
"psrl.dq") ||
217 Name.starts_with(
"psubs.") ||
218 Name.starts_with(
"psubus.") ||
219 Name.starts_with(
"vbroadcast") ||
220 Name ==
"vbroadcasti128" ||
221 Name ==
"vextracti128" ||
222 Name ==
"vinserti128" ||
223 Name ==
"vperm2i128");
225 if (Name.consume_front(
"avx512.")) {
226 if (Name.consume_front(
"mask."))
228 return (Name.starts_with(
"add.p") ||
229 Name.starts_with(
"and.") ||
230 Name.starts_with(
"andn.") ||
231 Name.starts_with(
"broadcast.s") ||
232 Name.starts_with(
"broadcastf32x4.") ||
233 Name.starts_with(
"broadcastf32x8.") ||
234 Name.starts_with(
"broadcastf64x2.") ||
235 Name.starts_with(
"broadcastf64x4.") ||
236 Name.starts_with(
"broadcasti32x4.") ||
237 Name.starts_with(
"broadcasti32x8.") ||
238 Name.starts_with(
"broadcasti64x2.") ||
239 Name.starts_with(
"broadcasti64x4.") ||
240 Name.starts_with(
"cmp.b") ||
241 Name.starts_with(
"cmp.d") ||
242 Name.starts_with(
"cmp.q") ||
243 Name.starts_with(
"cmp.w") ||
244 Name.starts_with(
"compress.b") ||
245 Name.starts_with(
"compress.d") ||
246 Name.starts_with(
"compress.p") ||
247 Name.starts_with(
"compress.q") ||
248 Name.starts_with(
"compress.store.") ||
249 Name.starts_with(
"compress.w") ||
250 Name.starts_with(
"conflict.") ||
251 Name.starts_with(
"cvtdq2pd.") ||
252 Name.starts_with(
"cvtdq2ps.") ||
253 Name ==
"cvtpd2dq.256" ||
254 Name ==
"cvtpd2ps.256" ||
255 Name ==
"cvtps2pd.128" ||
256 Name ==
"cvtps2pd.256" ||
257 Name.starts_with(
"cvtqq2pd.") ||
258 Name ==
"cvtqq2ps.256" ||
259 Name ==
"cvtqq2ps.512" ||
260 Name ==
"cvttpd2dq.256" ||
261 Name ==
"cvttps2dq.128" ||
262 Name ==
"cvttps2dq.256" ||
263 Name.starts_with(
"cvtudq2pd.") ||
264 Name.starts_with(
"cvtudq2ps.") ||
265 Name.starts_with(
"cvtuqq2pd.") ||
266 Name ==
"cvtuqq2ps.256" ||
267 Name ==
"cvtuqq2ps.512" ||
268 Name.starts_with(
"dbpsadbw.") ||
269 Name.starts_with(
"div.p") ||
270 Name.starts_with(
"expand.b") ||
271 Name.starts_with(
"expand.d") ||
272 Name.starts_with(
"expand.load.") ||
273 Name.starts_with(
"expand.p") ||
274 Name.starts_with(
"expand.q") ||
275 Name.starts_with(
"expand.w") ||
276 Name.starts_with(
"fpclass.p") ||
277 Name.starts_with(
"insert") ||
278 Name.starts_with(
"load.") ||
279 Name.starts_with(
"loadu.") ||
280 Name.starts_with(
"lzcnt.") ||
281 Name.starts_with(
"max.p") ||
282 Name.starts_with(
"min.p") ||
283 Name.starts_with(
"movddup") ||
284 Name.starts_with(
"move.s") ||
285 Name.starts_with(
"movshdup") ||
286 Name.starts_with(
"movsldup") ||
287 Name.starts_with(
"mul.p") ||
288 Name.starts_with(
"or.") ||
289 Name.starts_with(
"pabs.") ||
290 Name.starts_with(
"packssdw.") ||
291 Name.starts_with(
"packsswb.") ||
292 Name.starts_with(
"packusdw.") ||
293 Name.starts_with(
"packuswb.") ||
294 Name.starts_with(
"padd.") ||
295 Name.starts_with(
"padds.") ||
296 Name.starts_with(
"paddus.") ||
297 Name.starts_with(
"palignr.") ||
298 Name.starts_with(
"pand.") ||
299 Name.starts_with(
"pandn.") ||
300 Name.starts_with(
"pavg") ||
301 Name.starts_with(
"pbroadcast") ||
302 Name.starts_with(
"pcmpeq.") ||
303 Name.starts_with(
"pcmpgt.") ||
304 Name.starts_with(
"perm.df.") ||
305 Name.starts_with(
"perm.di.") ||
306 Name.starts_with(
"permvar.") ||
307 Name.starts_with(
"pmaddubs.w.") ||
308 Name.starts_with(
"pmaddw.d.") ||
309 Name.starts_with(
"pmax") ||
310 Name.starts_with(
"pmin") ||
311 Name ==
"pmov.qd.256" ||
312 Name ==
"pmov.qd.512" ||
313 Name ==
"pmov.wb.256" ||
314 Name ==
"pmov.wb.512" ||
315 Name.starts_with(
"pmovsx") ||
316 Name.starts_with(
"pmovzx") ||
317 Name.starts_with(
"pmul.dq.") ||
318 Name.starts_with(
"pmul.hr.sw.") ||
319 Name.starts_with(
"pmulh.w.") ||
320 Name.starts_with(
"pmulhu.w.") ||
321 Name.starts_with(
"pmull.") ||
322 Name.starts_with(
"pmultishift.qb.") ||
323 Name.starts_with(
"pmulu.dq.") ||
324 Name.starts_with(
"por.") ||
325 Name.starts_with(
"prol.") ||
326 Name.starts_with(
"prolv.") ||
327 Name.starts_with(
"pror.") ||
328 Name.starts_with(
"prorv.") ||
329 Name.starts_with(
"pshuf.b.") ||
330 Name.starts_with(
"pshuf.d.") ||
331 Name.starts_with(
"pshufh.w.") ||
332 Name.starts_with(
"pshufl.w.") ||
333 Name.starts_with(
"psll.d") ||
334 Name.starts_with(
"psll.q") ||
335 Name.starts_with(
"psll.w") ||
336 Name.starts_with(
"pslli") ||
337 Name.starts_with(
"psllv") ||
338 Name.starts_with(
"psra.d") ||
339 Name.starts_with(
"psra.q") ||
340 Name.starts_with(
"psra.w") ||
341 Name.starts_with(
"psrai") ||
342 Name.starts_with(
"psrav") ||
343 Name.starts_with(
"psrl.d") ||
344 Name.starts_with(
"psrl.q") ||
345 Name.starts_with(
"psrl.w") ||
346 Name.starts_with(
"psrli") ||
347 Name.starts_with(
"psrlv") ||
348 Name.starts_with(
"psub.") ||
349 Name.starts_with(
"psubs.") ||
350 Name.starts_with(
"psubus.") ||
351 Name.starts_with(
"pternlog.") ||
352 Name.starts_with(
"punpckh") ||
353 Name.starts_with(
"punpckl") ||
354 Name.starts_with(
"pxor.") ||
355 Name.starts_with(
"shuf.f") ||
356 Name.starts_with(
"shuf.i") ||
357 Name.starts_with(
"shuf.p") ||
358 Name.starts_with(
"sqrt.p") ||
359 Name.starts_with(
"store.b.") ||
360 Name.starts_with(
"store.d.") ||
361 Name.starts_with(
"store.p") ||
362 Name.starts_with(
"store.q.") ||
363 Name.starts_with(
"store.w.") ||
364 Name ==
"store.ss" ||
365 Name.starts_with(
"storeu.") ||
366 Name.starts_with(
"sub.p") ||
367 Name.starts_with(
"ucmp.") ||
368 Name.starts_with(
"unpckh.") ||
369 Name.starts_with(
"unpckl.") ||
370 Name.starts_with(
"valign.") ||
371 Name ==
"vcvtph2ps.128" ||
372 Name ==
"vcvtph2ps.256" ||
373 Name.starts_with(
"vextract") ||
374 Name.starts_with(
"vfmadd.") ||
375 Name.starts_with(
"vfmaddsub.") ||
376 Name.starts_with(
"vfnmadd.") ||
377 Name.starts_with(
"vfnmsub.") ||
378 Name.starts_with(
"vpdpbusd.") ||
379 Name.starts_with(
"vpdpbusds.") ||
380 Name.starts_with(
"vpdpwssd.") ||
381 Name.starts_with(
"vpdpwssds.") ||
382 Name.starts_with(
"vpermi2var.") ||
383 Name.starts_with(
"vpermil.p") ||
384 Name.starts_with(
"vpermilvar.") ||
385 Name.starts_with(
"vpermt2var.") ||
386 Name.starts_with(
"vpmadd52") ||
387 Name.starts_with(
"vpshld.") ||
388 Name.starts_with(
"vpshldv.") ||
389 Name.starts_with(
"vpshrd.") ||
390 Name.starts_with(
"vpshrdv.") ||
391 Name.starts_with(
"vpshufbitqmb.") ||
392 Name.starts_with(
"xor."));
394 if (Name.consume_front(
"mask3."))
396 return (Name.starts_with(
"vfmadd.") ||
397 Name.starts_with(
"vfmaddsub.") ||
398 Name.starts_with(
"vfmsub.") ||
399 Name.starts_with(
"vfmsubadd.") ||
400 Name.starts_with(
"vfnmsub."));
402 if (Name.consume_front(
"maskz."))
404 return (Name.starts_with(
"pternlog.") ||
405 Name.starts_with(
"vfmadd.") ||
406 Name.starts_with(
"vfmaddsub.") ||
407 Name.starts_with(
"vpdpbusd.") ||
408 Name.starts_with(
"vpdpbusds.") ||
409 Name.starts_with(
"vpdpwssd.") ||
410 Name.starts_with(
"vpdpwssds.") ||
411 Name.starts_with(
"vpermt2var.") ||
412 Name.starts_with(
"vpmadd52") ||
413 Name.starts_with(
"vpshldv.") ||
414 Name.starts_with(
"vpshrdv."));
417 return (Name ==
"movntdqa" ||
418 Name ==
"pmul.dq.512" ||
419 Name ==
"pmulu.dq.512" ||
420 Name.starts_with(
"broadcastm") ||
421 Name.starts_with(
"cmp.p") ||
422 Name.starts_with(
"cvtb2mask.") ||
423 Name.starts_with(
"cvtd2mask.") ||
424 Name.starts_with(
"cvtmask2") ||
425 Name.starts_with(
"cvtq2mask.") ||
426 Name ==
"cvtusi2sd" ||
427 Name.starts_with(
"cvtw2mask.") ||
432 Name ==
"kortestc.w" ||
433 Name ==
"kortestz.w" ||
434 Name.starts_with(
"kunpck") ||
437 Name.starts_with(
"padds.") ||
438 Name.starts_with(
"pbroadcast") ||
439 Name.starts_with(
"prol") ||
440 Name.starts_with(
"pror") ||
441 Name.starts_with(
"psll.dq") ||
442 Name.starts_with(
"psrl.dq") ||
443 Name.starts_with(
"psubs.") ||
444 Name.starts_with(
"ptestm") ||
445 Name.starts_with(
"ptestnm") ||
446 Name.starts_with(
"storent.") ||
447 Name.starts_with(
"vbroadcast.s") ||
448 Name.starts_with(
"vpshld.") ||
449 Name.starts_with(
"vpshrd."));
452 if (Name.consume_front(
"fma."))
453 return (Name.starts_with(
"vfmadd.") ||
454 Name.starts_with(
"vfmsub.") ||
455 Name.starts_with(
"vfmsubadd.") ||
456 Name.starts_with(
"vfnmadd.") ||
457 Name.starts_with(
"vfnmsub."));
459 if (Name.consume_front(
"fma4."))
460 return Name.starts_with(
"vfmadd.s");
462 if (Name.consume_front(
"sse."))
463 return (Name ==
"add.ss" ||
464 Name ==
"cvtsi2ss" ||
465 Name ==
"cvtsi642ss" ||
468 Name.starts_with(
"sqrt.p") ||
470 Name.starts_with(
"storeu.") ||
473 if (Name.consume_front(
"sse2."))
474 return (Name ==
"add.sd" ||
475 Name ==
"cvtdq2pd" ||
476 Name ==
"cvtdq2ps" ||
477 Name ==
"cvtps2pd" ||
478 Name ==
"cvtsi2sd" ||
479 Name ==
"cvtsi642sd" ||
480 Name ==
"cvtss2sd" ||
483 Name.starts_with(
"padds.") ||
484 Name.starts_with(
"paddus.") ||
485 Name.starts_with(
"pcmpeq.") ||
486 Name.starts_with(
"pcmpgt.") ||
491 Name ==
"pmulu.dq" ||
492 Name.starts_with(
"pshuf") ||
493 Name.starts_with(
"psll.dq") ||
494 Name.starts_with(
"psrl.dq") ||
495 Name.starts_with(
"psubs.") ||
496 Name.starts_with(
"psubus.") ||
497 Name.starts_with(
"sqrt.p") ||
499 Name ==
"storel.dq" ||
500 Name.starts_with(
"storeu.") ||
503 if (Name.consume_front(
"sse41."))
504 return (Name.starts_with(
"blendp") ||
505 Name ==
"movntdqa" ||
515 Name.starts_with(
"pmovsx") ||
516 Name.starts_with(
"pmovzx") ||
519 if (Name.consume_front(
"sse42."))
520 return Name ==
"crc32.64.8";
522 if (Name.consume_front(
"sse4a."))
523 return Name.starts_with(
"movnt.");
525 if (Name.consume_front(
"ssse3."))
526 return (Name ==
"pabs.b.128" ||
527 Name ==
"pabs.d.128" ||
528 Name ==
"pabs.w.128");
530 if (Name.consume_front(
"xop."))
531 return (Name ==
"vpcmov" ||
532 Name ==
"vpcmov.256" ||
533 Name.starts_with(
"vpcom") ||
534 Name.starts_with(
"vprot"));
536 if (Name.consume_front(
"bmi."))
537 return (Name.starts_with(
"pdep.") ||
538 Name.starts_with(
"pext."));
540 return (Name ==
"addcarry.u32" ||
541 Name ==
"addcarry.u64" ||
542 Name ==
"addcarryx.u32" ||
543 Name ==
"addcarryx.u64" ||
544 Name ==
"subborrow.u32" ||
545 Name ==
"subborrow.u64" ||
546 Name.starts_with(
"vcvtph2ps."));
552 if (!Name.consume_front(
"x86."))
560 if (Name ==
"rdtscp") {
562 if (
F->getFunctionType()->getNumParams() == 0)
567 Intrinsic::x86_rdtscp);
574 if (Name.consume_front(
"sse41.ptest")) {
576 .
Case(
"c", Intrinsic::x86_sse41_ptestc)
577 .
Case(
"z", Intrinsic::x86_sse41_ptestz)
578 .
Case(
"nzc", Intrinsic::x86_sse41_ptestnzc)
591 .
Case(
"sse41.insertps", Intrinsic::x86_sse41_insertps)
592 .
Case(
"sse41.dppd", Intrinsic::x86_sse41_dppd)
593 .
Case(
"sse41.dpps", Intrinsic::x86_sse41_dpps)
594 .
Case(
"sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
595 .
Case(
"avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
596 .
Case(
"avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
601 if (Name.consume_front(
"avx512.")) {
602 if (Name.consume_front(
"mask.cmp.")) {
605 .
Case(
"pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
606 .
Case(
"pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
607 .
Case(
"pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
608 .
Case(
"ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
609 .
Case(
"ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
610 .
Case(
"ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
614 }
else if (Name.starts_with(
"vpdpbusd.") ||
615 Name.starts_with(
"vpdpbusds.")) {
618 .
Case(
"vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
619 .
Case(
"vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
620 .
Case(
"vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
621 .
Case(
"vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
622 .
Case(
"vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
623 .
Case(
"vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
627 }
else if (Name.starts_with(
"vpdpwssd.") ||
628 Name.starts_with(
"vpdpwssds.")) {
631 .
Case(
"vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)
632 .
Case(
"vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)
633 .
Case(
"vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)
634 .
Case(
"vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)
635 .
Case(
"vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)
636 .
Case(
"vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)
644 if (Name.consume_front(
"avx2.")) {
645 if (Name.consume_front(
"vpdpb")) {
648 .
Case(
"ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
649 .
Case(
"ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
650 .
Case(
"ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
651 .
Case(
"ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
652 .
Case(
"sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
653 .
Case(
"sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
654 .
Case(
"suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
655 .
Case(
"suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
656 .
Case(
"uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
657 .
Case(
"uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
658 .
Case(
"uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
659 .
Case(
"uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
663 }
else if (Name.consume_front(
"vpdpw")) {
666 .
Case(
"sud.128", Intrinsic::x86_avx2_vpdpwsud_128)
667 .
Case(
"sud.256", Intrinsic::x86_avx2_vpdpwsud_256)
668 .
Case(
"suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)
669 .
Case(
"suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)
670 .
Case(
"usd.128", Intrinsic::x86_avx2_vpdpwusd_128)
671 .
Case(
"usd.256", Intrinsic::x86_avx2_vpdpwusd_256)
672 .
Case(
"usds.128", Intrinsic::x86_avx2_vpdpwusds_128)
673 .
Case(
"usds.256", Intrinsic::x86_avx2_vpdpwusds_256)
674 .
Case(
"uud.128", Intrinsic::x86_avx2_vpdpwuud_128)
675 .
Case(
"uud.256", Intrinsic::x86_avx2_vpdpwuud_256)
676 .
Case(
"uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)
677 .
Case(
"uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)
685 if (Name.consume_front(
"avx10.")) {
686 if (Name.consume_front(
"vpdpb")) {
689 .
Case(
"ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
690 .
Case(
"ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
691 .
Case(
"sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
692 .
Case(
"suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
693 .
Case(
"uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
694 .
Case(
"uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
698 }
else if (Name.consume_front(
"vpdpw")) {
700 .
Case(
"sud.512", Intrinsic::x86_avx10_vpdpwsud_512)
701 .
Case(
"suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)
702 .
Case(
"usd.512", Intrinsic::x86_avx10_vpdpwusd_512)
703 .
Case(
"usds.512", Intrinsic::x86_avx10_vpdpwusds_512)
704 .
Case(
"uud.512", Intrinsic::x86_avx10_vpdpwuud_512)
705 .
Case(
"uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)
713 if (Name.consume_front(
"avx512bf16.")) {
716 .
Case(
"cvtne2ps2bf16.128",
717 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
718 .
Case(
"cvtne2ps2bf16.256",
719 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
720 .
Case(
"cvtne2ps2bf16.512",
721 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
722 .
Case(
"mask.cvtneps2bf16.128",
723 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
724 .
Case(
"cvtneps2bf16.256",
725 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
726 .
Case(
"cvtneps2bf16.512",
727 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
734 .
Case(
"dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
735 .
Case(
"dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
736 .
Case(
"dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
743 if (Name.consume_front(
"xop.")) {
745 if (Name.starts_with(
"vpermil2")) {
748 auto Idx =
F->getFunctionType()->getParamType(2);
749 if (Idx->isFPOrFPVectorTy()) {
750 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
751 unsigned EltSize = Idx->getScalarSizeInBits();
752 if (EltSize == 64 && IdxSize == 128)
753 ID = Intrinsic::x86_xop_vpermil2pd;
754 else if (EltSize == 32 && IdxSize == 128)
755 ID = Intrinsic::x86_xop_vpermil2ps;
756 else if (EltSize == 64 && IdxSize == 256)
757 ID = Intrinsic::x86_xop_vpermil2pd_256;
759 ID = Intrinsic::x86_xop_vpermil2ps_256;
761 }
else if (
F->arg_size() == 2)
764 .
Case(
"vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
765 .
Case(
"vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
776 if (Name ==
"seh.recoverfp") {
778 Intrinsic::eh_recoverfp);
790 if (Name.starts_with(
"rbit")) {
793 F->getParent(), Intrinsic::bitreverse,
F->arg_begin()->getType());
797 if (Name ==
"thread.pointer") {
800 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
804 bool Neon = Name.consume_front(
"neon.");
809 if (Name.consume_front(
"bfdot.")) {
813 .
Cases({
"v2f32.v8i8",
"v4f32.v16i8"},
818 size_t OperandWidth =
F->getReturnType()->getPrimitiveSizeInBits();
819 assert((OperandWidth == 64 || OperandWidth == 128) &&
820 "Unexpected operand width");
822 std::array<Type *, 2> Tys{
833 if (Name.consume_front(
"bfm")) {
835 if (Name.consume_back(
".v4f32.v16i8")) {
881 F->arg_begin()->getType());
885 if (Name.consume_front(
"vst")) {
887 static const Regex vstRegex(
"^([1234]|[234]lane)\\.v[a-z0-9]*$");
891 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
892 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
895 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
896 Intrinsic::arm_neon_vst4lane};
898 auto fArgs =
F->getFunctionType()->params();
899 Type *Tys[] = {fArgs[0], fArgs[1]};
902 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
905 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
914 if (Name.consume_front(
"mve.")) {
916 if (Name ==
"vctp64") {
926 if (Name.starts_with(
"vrintn.v")) {
928 F->getParent(), Intrinsic::roundeven,
F->arg_begin()->getType());
933 if (Name.consume_back(
".v4i1")) {
935 if (Name.consume_back(
".predicated.v2i64.v4i32"))
937 return Name ==
"mull.int" || Name ==
"vqdmull";
939 if (Name.consume_back(
".v2i64")) {
941 bool IsGather = Name.consume_front(
"vldr.gather.");
942 if (IsGather || Name.consume_front(
"vstr.scatter.")) {
943 if (Name.consume_front(
"base.")) {
945 Name.consume_front(
"wb.");
948 return Name ==
"predicated.v2i64";
951 if (Name.consume_front(
"offset.predicated."))
952 return Name == (IsGather ?
"v2i64.p0i64" :
"p0i64.v2i64") ||
953 Name == (IsGather ?
"v2i64.p0" :
"p0.v2i64");
966 if (Name.consume_front(
"cde.vcx")) {
968 if (Name.consume_back(
".predicated.v2i64.v4i1"))
970 return Name ==
"1q" || Name ==
"1qa" || Name ==
"2q" || Name ==
"2qa" ||
971 Name ==
"3q" || Name ==
"3qa";
985 F->arg_begin()->getType());
989 if (Name.starts_with(
"addp")) {
991 if (
F->arg_size() != 2)
994 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
996 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
1002 if (Name.starts_with(
"bfcvt")) {
1008 if (Name ==
"vcvtfp2hf" || Name ==
"vcvthf2fp") {
1015 if (Name.consume_front(
"sve.")) {
1017 if (Name.consume_front(
"bf")) {
1018 if (Name ==
"mmla") {
1019 Type *Tys[] = {
F->getReturnType(),
1020 std::next(
F->arg_begin())->getType()};
1022 F->getParent(), Intrinsic::aarch64_sve_fmmla, Tys);
1025 if (Name.consume_back(
".lane")) {
1029 .
Case(
"dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
1030 .
Case(
"mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1031 .
Case(
"mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1043 if (Name ==
"fcvt.bf16f32" || Name ==
"fcvtnt.bf16f32") {
1048 if (Name.consume_front(
"addqv")) {
1050 if (!
F->getReturnType()->isFPOrFPVectorTy())
1053 auto Args =
F->getFunctionType()->params();
1054 Type *Tys[] = {
F->getReturnType(), Args[1]};
1056 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
1060 if (Name.consume_front(
"ld")) {
1062 static const Regex LdRegex(
"^[234](.nxv[a-z0-9]+|$)");
1063 if (LdRegex.
match(Name)) {
1069 "Expected 2 arguments for ld* intrinsic.");
1070 Type *PtrTy =
F->getArg(1)->getType();
1073 Intrinsic::aarch64_sve_ld2_sret,
1074 Intrinsic::aarch64_sve_ld3_sret,
1075 Intrinsic::aarch64_sve_ld4_sret,
1078 F->getParent(), LoadIDs[Name[0] -
'2'], {Ty, PtrTy});
1084 if (Name.consume_front(
"tuple.")) {
1086 if (Name.starts_with(
"get")) {
1088 Type *Tys[] = {
F->getReturnType(),
F->arg_begin()->getType()};
1090 F->getParent(), Intrinsic::vector_extract, Tys);
1094 if (Name.starts_with(
"set")) {
1096 auto Args =
F->getFunctionType()->params();
1097 Type *Tys[] = {Args[0], Args[2], Args[1]};
1099 F->getParent(), Intrinsic::vector_insert, Tys);
1103 static const Regex CreateTupleRegex(
"^create[234](.nxv[a-z0-9]+|$)");
1104 if (CreateTupleRegex.
match(Name)) {
1106 auto Args =
F->getFunctionType()->params();
1107 Type *Tys[] = {
F->getReturnType(), Args[1]};
1109 F->getParent(), Intrinsic::vector_insert, Tys);
1115 if (Name.starts_with(
"rev.nxv")) {
1118 F->getParent(), Intrinsic::vector_reverse,
F->getReturnType());
1124 if (Name.consume_front(
"sme.")) {
1126 if (Name.consume_front(
"ftmopa.")) {
1131 .
Case(
"za16.nxv16i8", Intrinsic::aarch64_sme_fp8_ftmopa_za16)
1132 .
Case(
"za32.nxv16i8", Intrinsic::aarch64_sme_fp8_ftmopa_za32)
1149 if (Name.consume_front(
"cp.async.bulk.tensor.g2s.")) {
1153 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1155 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1157 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1158 .
Case(
"tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1159 .
Case(
"tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1160 .
Case(
"tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1161 .
Case(
"tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1162 .
Case(
"tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1171 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1185 size_t FlagStartIndex =
F->getFunctionType()->getNumParams() - 3;
1186 Type *ArgType =
F->getFunctionType()->getParamType(FlagStartIndex);
1196 if (Name.consume_front(
"mapa.shared.cluster"))
1197 if (
F->getReturnType()->getPointerAddressSpace() ==
1199 return Intrinsic::nvvm_mapa_shared_cluster;
1201 if (Name.consume_front(
"cp.async.bulk.")) {
1204 .
Case(
"global.to.shared.cluster",
1205 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1206 .
Case(
"shared.cta.to.cluster",
1207 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1211 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1220 if (Name.consume_front(
"fma.rn."))
1222 .
Case(
"bf16", Intrinsic::nvvm_fma_rn_bf16)
1223 .
Case(
"bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1224 .
Case(
"relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1225 .
Case(
"relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1228 if (Name.consume_front(
"fmax."))
1230 .
Case(
"bf16", Intrinsic::nvvm_fmax_bf16)
1231 .
Case(
"bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1232 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1233 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1234 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1235 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1236 .
Case(
"ftz.nan.xorsign.abs.bf16",
1237 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1238 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1239 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1240 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1241 .
Case(
"ftz.xorsign.abs.bf16x2",
1242 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1243 .
Case(
"nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1244 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1245 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1246 .
Case(
"nan.xorsign.abs.bf16x2",
1247 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1248 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1249 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1252 if (Name.consume_front(
"fmin."))
1254 .
Case(
"bf16", Intrinsic::nvvm_fmin_bf16)
1255 .
Case(
"bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1256 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1257 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1258 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1259 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1260 .
Case(
"ftz.nan.xorsign.abs.bf16",
1261 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1262 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1263 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1264 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1265 .
Case(
"ftz.xorsign.abs.bf16x2",
1266 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1267 .
Case(
"nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1268 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1269 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1270 .
Case(
"nan.xorsign.abs.bf16x2",
1271 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1272 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1273 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1276 if (Name.consume_front(
"neg."))
1278 .
Case(
"bf16", Intrinsic::nvvm_neg_bf16)
1279 .
Case(
"bf16x2", Intrinsic::nvvm_neg_bf16x2)
1286 return Name.consume_front(
"local") || Name.consume_front(
"shared") ||
1287 Name.consume_front(
"global") || Name.consume_front(
"constant") ||
1288 Name.consume_front(
"param");
1294 if (Name.starts_with(
"to.fp16")) {
1298 FuncTy->getReturnType());
1301 if (Name.starts_with(
"from.fp16")) {
1305 FuncTy->getReturnType());
1312 bool CanUpgradeDebugIntrinsicsToRecords) {
1313 assert(
F &&
"Illegal to upgrade a non-existent Function.");
1318 if (!Name.consume_front(
"llvm.") || Name.empty())
1324 bool IsArm = Name.consume_front(
"arm.");
1325 if (IsArm || Name.consume_front(
"aarch64.")) {
1331 if (Name.consume_front(
"amdgcn.")) {
1332 if (Name ==
"alignbit") {
1335 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1339 if (Name.consume_front(
"atomic.")) {
1340 if (Name.starts_with(
"inc") || Name.starts_with(
"dec") ||
1341 Name.starts_with(
"cond.sub") || Name.starts_with(
"csub")) {
1350 switch (
F->getIntrinsicID()) {
1354 case Intrinsic::amdgcn_wmma_i32_16x16x64_iu8:
1355 if (
F->arg_size() == 7) {
1360 case Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8:
1361 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
1362 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
1363 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
1364 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
1365 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
1366 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16:
1367 if (
F->arg_size() == 8) {
1374 if (Name.consume_front(
"ds.") || Name.consume_front(
"global.atomic.") ||
1375 Name.consume_front(
"flat.atomic.")) {
1376 if (Name.starts_with(
"fadd") ||
1378 (Name.starts_with(
"fmin") && !Name.starts_with(
"fmin.num")) ||
1379 (Name.starts_with(
"fmax") && !Name.starts_with(
"fmax.num"))) {
1387 if (Name.starts_with(
"ldexp.")) {
1390 F->getParent(), Intrinsic::ldexp,
1391 {F->getReturnType(), F->getArg(1)->getType()});
1400 if (
F->arg_size() == 1) {
1401 if (Name.consume_front(
"convert.")) {
1415 F->arg_begin()->getType());
1420 if (
F->arg_size() == 2 && Name ==
"coro.end") {
1423 Intrinsic::coro_end);
1430 if (Name.consume_front(
"dbg.")) {
1432 if (CanUpgradeDebugIntrinsicsToRecords) {
1433 if (Name ==
"addr" || Name ==
"value" || Name ==
"assign" ||
1434 Name ==
"declare" || Name ==
"label") {
1443 if (Name ==
"addr" || (Name ==
"value" &&
F->arg_size() == 4)) {
1446 Intrinsic::dbg_value);
1453 if (Name.consume_front(
"experimental.vector.")) {
1459 .
StartsWith(
"extract.", Intrinsic::vector_extract)
1460 .
StartsWith(
"insert.", Intrinsic::vector_insert)
1461 .
StartsWith(
"reverse.", Intrinsic::vector_reverse)
1462 .
StartsWith(
"interleave2.", Intrinsic::vector_interleave2)
1463 .
StartsWith(
"deinterleave2.", Intrinsic::vector_deinterleave2)
1465 Intrinsic::vector_partial_reduce_add)
1468 const auto *FT =
F->getFunctionType();
1470 if (
ID == Intrinsic::vector_extract ||
1471 ID == Intrinsic::vector_interleave2)
1474 if (
ID != Intrinsic::vector_interleave2)
1476 if (
ID == Intrinsic::vector_insert ||
1477 ID == Intrinsic::vector_partial_reduce_add)
1485 if (Name.consume_front(
"reduce.")) {
1487 static const Regex R(
"^([a-z]+)\\.[a-z][0-9]+");
1488 if (R.match(Name, &
Groups))
1490 .
Case(
"add", Intrinsic::vector_reduce_add)
1491 .
Case(
"mul", Intrinsic::vector_reduce_mul)
1492 .
Case(
"and", Intrinsic::vector_reduce_and)
1493 .
Case(
"or", Intrinsic::vector_reduce_or)
1494 .
Case(
"xor", Intrinsic::vector_reduce_xor)
1495 .
Case(
"smax", Intrinsic::vector_reduce_smax)
1496 .
Case(
"smin", Intrinsic::vector_reduce_smin)
1497 .
Case(
"umax", Intrinsic::vector_reduce_umax)
1498 .
Case(
"umin", Intrinsic::vector_reduce_umin)
1499 .
Case(
"fmax", Intrinsic::vector_reduce_fmax)
1500 .
Case(
"fmin", Intrinsic::vector_reduce_fmin)
1505 static const Regex R2(
"^v2\\.([a-z]+)\\.[fi][0-9]+");
1510 .
Case(
"fadd", Intrinsic::vector_reduce_fadd)
1511 .
Case(
"fmul", Intrinsic::vector_reduce_fmul)
1516 auto Args =
F->getFunctionType()->params();
1518 {Args[V2 ? 1 : 0]});
1524 if (Name.consume_front(
"splice"))
1528 if (Name.consume_front(
"experimental.stepvector.")) {
1532 F->getParent(),
ID,
F->getFunctionType()->getReturnType());
1537 if (Name.starts_with(
"flt.rounds")) {
1540 Intrinsic::get_rounding);
1545 if (Name.starts_with(
"invariant.group.barrier")) {
1547 auto Args =
F->getFunctionType()->params();
1548 Type* ObjectPtr[1] = {Args[0]};
1551 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1556 if ((Name.starts_with(
"lifetime.start") ||
1557 Name.starts_with(
"lifetime.end")) &&
1558 F->arg_size() == 2) {
1560 ? Intrinsic::lifetime_start
1561 : Intrinsic::lifetime_end;
1566 F->getArg(1)->getType());
1575 .StartsWith(
"memcpy.", Intrinsic::memcpy)
1576 .StartsWith(
"memmove.", Intrinsic::memmove)
1578 if (
F->arg_size() == 5) {
1582 F->getFunctionType()->params().slice(0, 3);
1588 if (Name.starts_with(
"memset.") &&
F->arg_size() == 5) {
1591 const auto *FT =
F->getFunctionType();
1592 Type *ParamTypes[2] = {
1593 FT->getParamType(0),
1597 Intrinsic::memset, ParamTypes);
1603 .
StartsWith(
"masked.load", Intrinsic::masked_load)
1604 .
StartsWith(
"masked.gather", Intrinsic::masked_gather)
1605 .
StartsWith(
"masked.store", Intrinsic::masked_store)
1606 .
StartsWith(
"masked.scatter", Intrinsic::masked_scatter)
1608 if (MaskedID &&
F->arg_size() == 4) {
1610 if (MaskedID == Intrinsic::masked_load ||
1611 MaskedID == Intrinsic::masked_gather) {
1613 F->getParent(), MaskedID,
1614 {F->getReturnType(), F->getArg(0)->getType()});
1618 F->getParent(), MaskedID,
1619 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1625 if (Name.consume_front(
"nvvm.")) {
1627 if (
F->arg_size() == 1) {
1630 .
Cases({
"brev32",
"brev64"}, Intrinsic::bitreverse)
1631 .Case(
"clz.i", Intrinsic::ctlz)
1632 .
Case(
"popc.i", Intrinsic::ctpop)
1636 {F->getReturnType()});
1639 }
else if (
F->arg_size() == 2) {
1642 .
Cases({
"max.s",
"max.i",
"max.ll"}, Intrinsic::smax)
1643 .Cases({
"min.s",
"min.i",
"min.ll"}, Intrinsic::smin)
1644 .Cases({
"max.us",
"max.ui",
"max.ull"}, Intrinsic::umax)
1645 .Cases({
"min.us",
"min.ui",
"min.ull"}, Intrinsic::umin)
1649 {F->getReturnType()});
1655 if (!
F->getReturnType()->getScalarType()->isBFloatTy()) {
1683 bool Expand =
false;
1684 if (Name.consume_front(
"abs."))
1687 Name ==
"i" || Name ==
"ll" || Name ==
"bf16" || Name ==
"bf16x2";
1688 else if (Name.consume_front(
"fabs."))
1690 Expand = Name ==
"f" || Name ==
"ftz.f" || Name ==
"d";
1691 else if (Name.consume_front(
"ex2.approx."))
1694 Name ==
"f" || Name ==
"ftz.f" || Name ==
"d" || Name ==
"f16x2";
1695 else if (Name.consume_front(
"atomic.load."))
1704 else if (Name.consume_front(
"atomic."))
1719 else if (Name.consume_front(
"bitcast."))
1722 Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" || Name ==
"d2ll";
1723 else if (Name.consume_front(
"rotate."))
1725 Expand = Name ==
"b32" || Name ==
"b64" || Name ==
"right.b64";
1726 else if (Name.consume_front(
"ptr.gen.to."))
1729 else if (Name.consume_front(
"ptr."))
1732 else if (Name.consume_front(
"ldg.global."))
1734 Expand = (Name.starts_with(
"i.") || Name.starts_with(
"f.") ||
1735 Name.starts_with(
"p."));
1738 .
Case(
"barrier0",
true)
1739 .
Case(
"barrier.n",
true)
1740 .
Case(
"barrier.sync.cnt",
true)
1741 .
Case(
"barrier.sync",
true)
1742 .
Case(
"barrier",
true)
1743 .
Case(
"bar.sync",
true)
1744 .
Case(
"barrier0.popc",
true)
1745 .
Case(
"barrier0.and",
true)
1746 .
Case(
"barrier0.or",
true)
1747 .
Case(
"clz.ll",
true)
1748 .
Case(
"popc.ll",
true)
1750 .
Case(
"swap.lo.hi.b64",
true)
1751 .
Case(
"tanh.approx.f32",
true)
1763 if (Name.starts_with(
"objectsize.")) {
1764 Type *Tys[2] = {
F->getReturnType(),
F->arg_begin()->getType() };
1765 if (
F->arg_size() == 2 ||
F->arg_size() == 3) {
1768 Intrinsic::objectsize, Tys);
1775 if (Name.starts_with(
"ptr.annotation.") &&
F->arg_size() == 4) {
1778 F->getParent(), Intrinsic::ptr_annotation,
1779 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1785 if (Name.consume_front(
"riscv.")) {
1788 .
Case(
"aes32dsi", Intrinsic::riscv_aes32dsi)
1789 .
Case(
"aes32dsmi", Intrinsic::riscv_aes32dsmi)
1790 .
Case(
"aes32esi", Intrinsic::riscv_aes32esi)
1791 .
Case(
"aes32esmi", Intrinsic::riscv_aes32esmi)
1794 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1807 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1808 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1817 .
StartsWith(
"sha256sig0", Intrinsic::riscv_sha256sig0)
1818 .
StartsWith(
"sha256sig1", Intrinsic::riscv_sha256sig1)
1819 .
StartsWith(
"sha256sum0", Intrinsic::riscv_sha256sum0)
1820 .
StartsWith(
"sha256sum1", Intrinsic::riscv_sha256sum1)
1825 if (
F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1834 if (Name ==
"clmul.i32" || Name ==
"clmul.i64") {
1836 F->getParent(), Intrinsic::clmul, {F->getReturnType()});
1845 if (Name ==
"stackprotectorcheck") {
1852 if (Name ==
"thread.pointer") {
1854 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
1860 if (Name ==
"var.annotation" &&
F->arg_size() == 4) {
1863 F->getParent(), Intrinsic::var_annotation,
1864 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1867 if (Name.consume_front(
"vector.splice")) {
1868 if (Name.starts_with(
".left") || Name.starts_with(
".right"))
1876 if (Name.consume_front(
"wasm.")) {
1879 .
StartsWith(
"fma.", Intrinsic::wasm_relaxed_madd)
1880 .
StartsWith(
"fms.", Intrinsic::wasm_relaxed_nmadd)
1881 .
StartsWith(
"laneselect.", Intrinsic::wasm_relaxed_laneselect)
1886 F->getReturnType());
1890 if (Name.consume_front(
"dot.i8x16.i7x16.")) {
1892 .
Case(
"signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1894 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1913 if (ST && (!
ST->isLiteral() ||
ST->isPacked()) &&
1923 std::string
Name =
F->getName().str();
1926 Name,
F->getParent());
1937 if (Result != std::nullopt) {
1950 bool CanUpgradeDebugIntrinsicsToRecords) {
1970 GV->
getName() ==
"llvm.global_dtors")) ||
1985 unsigned N =
Init->getNumOperands();
1986 std::vector<Constant *> NewCtors(
N);
1987 for (
unsigned i = 0; i !=
N; ++i) {
1990 Ctor->getAggregateElement(1),
2004 unsigned NumElts = ResultTy->getNumElements() * 8;
2008 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
2018 for (
unsigned l = 0; l != NumElts; l += 16)
2019 for (
unsigned i = 0; i != 16; ++i) {
2020 unsigned Idx = NumElts + i - Shift;
2022 Idx -= NumElts - 16;
2023 Idxs[l + i] = Idx + l;
2026 Res = Builder.CreateShuffleVector(Res,
Op,
ArrayRef(Idxs, NumElts));
2030 return Builder.CreateBitCast(Res, ResultTy,
"cast");
2038 unsigned NumElts = ResultTy->getNumElements() * 8;
2042 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
2052 for (
unsigned l = 0; l != NumElts; l += 16)
2053 for (
unsigned i = 0; i != 16; ++i) {
2054 unsigned Idx = i + Shift;
2056 Idx += NumElts - 16;
2057 Idxs[l + i] = Idx + l;
2060 Res = Builder.CreateShuffleVector(
Op, Res,
ArrayRef(Idxs, NumElts));
2064 return Builder.CreateBitCast(Res, ResultTy,
"cast");
2072 Mask = Builder.CreateBitCast(Mask, MaskTy);
2078 for (
unsigned i = 0; i != NumElts; ++i)
2080 Mask = Builder.CreateShuffleVector(Mask, Mask,
ArrayRef(Indices, NumElts),
2091 if (
C->isAllOnesValue())
2096 return Builder.CreateSelect(Mask, Op0, Op1);
2103 if (
C->isAllOnesValue())
2107 Mask->getType()->getIntegerBitWidth());
2108 Mask = Builder.CreateBitCast(Mask, MaskTy);
2109 Mask = Builder.CreateExtractElement(Mask, (
uint64_t)0);
2110 return Builder.CreateSelect(Mask, Op0, Op1);
2123 assert((IsVALIGN || NumElts % 16 == 0) &&
"Illegal NumElts for PALIGNR!");
2124 assert((!IsVALIGN || NumElts <= 16) &&
"NumElts too large for VALIGN!");
2129 ShiftVal &= (NumElts - 1);
2138 if (ShiftVal > 16) {
2146 for (
unsigned l = 0; l < NumElts; l += 16) {
2147 for (
unsigned i = 0; i != 16; ++i) {
2148 unsigned Idx = ShiftVal + i;
2149 if (!IsVALIGN && Idx >= 16)
2150 Idx += NumElts - 16;
2151 Indices[l + i] = Idx + l;
2156 Op1, Op0,
ArrayRef(Indices, NumElts),
"palignr");
2162 bool ZeroMask,
bool IndexForm) {
2165 unsigned EltWidth = Ty->getScalarSizeInBits();
2166 bool IsFloat = Ty->isFPOrFPVectorTy();
2168 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2169 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2170 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2171 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2172 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2173 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2174 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2175 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2176 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2177 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2178 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2179 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2180 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2181 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2182 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2183 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2184 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2185 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2186 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2187 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2188 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2189 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2190 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2191 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2192 else if (VecWidth == 128 && EltWidth == 16)
2193 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2194 else if (VecWidth == 256 && EltWidth == 16)
2195 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2196 else if (VecWidth == 512 && EltWidth == 16)
2197 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2198 else if (VecWidth == 128 && EltWidth == 8)
2199 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2200 else if (VecWidth == 256 && EltWidth == 8)
2201 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2202 else if (VecWidth == 512 && EltWidth == 8)
2203 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2214 Value *V = Builder.CreateIntrinsic(IID, Args);
2226 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2237 bool IsRotateRight) {
2247 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
2248 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2251 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2252 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2297 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2302 bool IsShiftRight,
bool ZeroMask) {
2316 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
2317 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2320 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2321 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2336 const Align Alignment =
2338 ?
Align(
Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2343 if (
C->isAllOnesValue())
2344 return Builder.CreateAlignedStore(
Data, Ptr, Alignment);
2349 return Builder.CreateMaskedStore(
Data, Ptr, Alignment, Mask);
2355 const Align Alignment =
2364 if (
C->isAllOnesValue())
2365 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2370 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2376 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2377 {Op0, Builder.getInt1(
false)});
2392 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2393 LHS = Builder.CreateShl(
LHS, ShiftAmt);
2394 LHS = Builder.CreateAShr(
LHS, ShiftAmt);
2395 RHS = Builder.CreateShl(
RHS, ShiftAmt);
2396 RHS = Builder.CreateAShr(
RHS, ShiftAmt);
2399 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2400 LHS = Builder.CreateAnd(
LHS, Mask);
2401 RHS = Builder.CreateAnd(
RHS, Mask);
2418 if (!
C || !
C->isAllOnesValue())
2419 Vec = Builder.CreateAnd(Vec,
getX86MaskVec(Builder, Mask, NumElts));
2424 for (
unsigned i = 0; i != NumElts; ++i)
2426 for (
unsigned i = NumElts; i != 8; ++i)
2427 Indices[i] = NumElts + i % NumElts;
2428 Vec = Builder.CreateShuffleVector(Vec,
2432 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2436 unsigned CC,
bool Signed) {
2444 }
else if (CC == 7) {
2480 Value* AndNode = Builder.CreateAnd(Mask,
APInt(8, 1));
2481 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2483 Value* Extract2 = Builder.CreateExtractElement(Src, (
uint64_t)0);
2484 Value*
Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2493 return Builder.CreateSExt(Mask, ReturnOp,
"vpmovm2");
2499 Name = Name.substr(12);
2504 if (Name.starts_with(
"max.p")) {
2505 if (VecWidth == 128 && EltWidth == 32)
2506 IID = Intrinsic::x86_sse_max_ps;
2507 else if (VecWidth == 128 && EltWidth == 64)
2508 IID = Intrinsic::x86_sse2_max_pd;
2509 else if (VecWidth == 256 && EltWidth == 32)
2510 IID = Intrinsic::x86_avx_max_ps_256;
2511 else if (VecWidth == 256 && EltWidth == 64)
2512 IID = Intrinsic::x86_avx_max_pd_256;
2515 }
else if (Name.starts_with(
"min.p")) {
2516 if (VecWidth == 128 && EltWidth == 32)
2517 IID = Intrinsic::x86_sse_min_ps;
2518 else if (VecWidth == 128 && EltWidth == 64)
2519 IID = Intrinsic::x86_sse2_min_pd;
2520 else if (VecWidth == 256 && EltWidth == 32)
2521 IID = Intrinsic::x86_avx_min_ps_256;
2522 else if (VecWidth == 256 && EltWidth == 64)
2523 IID = Intrinsic::x86_avx_min_pd_256;
2526 }
else if (Name.starts_with(
"pshuf.b.")) {
2527 if (VecWidth == 128)
2528 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2529 else if (VecWidth == 256)
2530 IID = Intrinsic::x86_avx2_pshuf_b;
2531 else if (VecWidth == 512)
2532 IID = Intrinsic::x86_avx512_pshuf_b_512;
2535 }
else if (Name.starts_with(
"pmul.hr.sw.")) {
2536 if (VecWidth == 128)
2537 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2538 else if (VecWidth == 256)
2539 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2540 else if (VecWidth == 512)
2541 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2544 }
else if (Name.starts_with(
"pmulh.w.")) {
2545 if (VecWidth == 128)
2546 IID = Intrinsic::x86_sse2_pmulh_w;
2547 else if (VecWidth == 256)
2548 IID = Intrinsic::x86_avx2_pmulh_w;
2549 else if (VecWidth == 512)
2550 IID = Intrinsic::x86_avx512_pmulh_w_512;
2553 }
else if (Name.starts_with(
"pmulhu.w.")) {
2554 if (VecWidth == 128)
2555 IID = Intrinsic::x86_sse2_pmulhu_w;
2556 else if (VecWidth == 256)
2557 IID = Intrinsic::x86_avx2_pmulhu_w;
2558 else if (VecWidth == 512)
2559 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2562 }
else if (Name.starts_with(
"pmaddw.d.")) {
2563 if (VecWidth == 128)
2564 IID = Intrinsic::x86_sse2_pmadd_wd;
2565 else if (VecWidth == 256)
2566 IID = Intrinsic::x86_avx2_pmadd_wd;
2567 else if (VecWidth == 512)
2568 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2571 }
else if (Name.starts_with(
"pmaddubs.w.")) {
2572 if (VecWidth == 128)
2573 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2574 else if (VecWidth == 256)
2575 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2576 else if (VecWidth == 512)
2577 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2580 }
else if (Name.starts_with(
"packsswb.")) {
2581 if (VecWidth == 128)
2582 IID = Intrinsic::x86_sse2_packsswb_128;
2583 else if (VecWidth == 256)
2584 IID = Intrinsic::x86_avx2_packsswb;
2585 else if (VecWidth == 512)
2586 IID = Intrinsic::x86_avx512_packsswb_512;
2589 }
else if (Name.starts_with(
"packssdw.")) {
2590 if (VecWidth == 128)
2591 IID = Intrinsic::x86_sse2_packssdw_128;
2592 else if (VecWidth == 256)
2593 IID = Intrinsic::x86_avx2_packssdw;
2594 else if (VecWidth == 512)
2595 IID = Intrinsic::x86_avx512_packssdw_512;
2598 }
else if (Name.starts_with(
"packuswb.")) {
2599 if (VecWidth == 128)
2600 IID = Intrinsic::x86_sse2_packuswb_128;
2601 else if (VecWidth == 256)
2602 IID = Intrinsic::x86_avx2_packuswb;
2603 else if (VecWidth == 512)
2604 IID = Intrinsic::x86_avx512_packuswb_512;
2607 }
else if (Name.starts_with(
"packusdw.")) {
2608 if (VecWidth == 128)
2609 IID = Intrinsic::x86_sse41_packusdw;
2610 else if (VecWidth == 256)
2611 IID = Intrinsic::x86_avx2_packusdw;
2612 else if (VecWidth == 512)
2613 IID = Intrinsic::x86_avx512_packusdw_512;
2616 }
else if (Name.starts_with(
"vpermilvar.")) {
2617 if (VecWidth == 128 && EltWidth == 32)
2618 IID = Intrinsic::x86_avx_vpermilvar_ps;
2619 else if (VecWidth == 128 && EltWidth == 64)
2620 IID = Intrinsic::x86_avx_vpermilvar_pd;
2621 else if (VecWidth == 256 && EltWidth == 32)
2622 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2623 else if (VecWidth == 256 && EltWidth == 64)
2624 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2625 else if (VecWidth == 512 && EltWidth == 32)
2626 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2627 else if (VecWidth == 512 && EltWidth == 64)
2628 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2631 }
else if (Name ==
"cvtpd2dq.256") {
2632 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2633 }
else if (Name ==
"cvtpd2ps.256") {
2634 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2635 }
else if (Name ==
"cvttpd2dq.256") {
2636 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2637 }
else if (Name ==
"cvttps2dq.128") {
2638 IID = Intrinsic::x86_sse2_cvttps2dq;
2639 }
else if (Name ==
"cvttps2dq.256") {
2640 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2641 }
else if (Name.starts_with(
"permvar.")) {
2643 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2644 IID = Intrinsic::x86_avx2_permps;
2645 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2646 IID = Intrinsic::x86_avx2_permd;
2647 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2648 IID = Intrinsic::x86_avx512_permvar_df_256;
2649 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2650 IID = Intrinsic::x86_avx512_permvar_di_256;
2651 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2652 IID = Intrinsic::x86_avx512_permvar_sf_512;
2653 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2654 IID = Intrinsic::x86_avx512_permvar_si_512;
2655 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2656 IID = Intrinsic::x86_avx512_permvar_df_512;
2657 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2658 IID = Intrinsic::x86_avx512_permvar_di_512;
2659 else if (VecWidth == 128 && EltWidth == 16)
2660 IID = Intrinsic::x86_avx512_permvar_hi_128;
2661 else if (VecWidth == 256 && EltWidth == 16)
2662 IID = Intrinsic::x86_avx512_permvar_hi_256;
2663 else if (VecWidth == 512 && EltWidth == 16)
2664 IID = Intrinsic::x86_avx512_permvar_hi_512;
2665 else if (VecWidth == 128 && EltWidth == 8)
2666 IID = Intrinsic::x86_avx512_permvar_qi_128;
2667 else if (VecWidth == 256 && EltWidth == 8)
2668 IID = Intrinsic::x86_avx512_permvar_qi_256;
2669 else if (VecWidth == 512 && EltWidth == 8)
2670 IID = Intrinsic::x86_avx512_permvar_qi_512;
2673 }
else if (Name.starts_with(
"dbpsadbw.")) {
2674 if (VecWidth == 128)
2675 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2676 else if (VecWidth == 256)
2677 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2678 else if (VecWidth == 512)
2679 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2682 }
else if (Name.starts_with(
"pmultishift.qb.")) {
2683 if (VecWidth == 128)
2684 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2685 else if (VecWidth == 256)
2686 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2687 else if (VecWidth == 512)
2688 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2691 }
else if (Name.starts_with(
"conflict.")) {
2692 if (Name[9] ==
'd' && VecWidth == 128)
2693 IID = Intrinsic::x86_avx512_conflict_d_128;
2694 else if (Name[9] ==
'd' && VecWidth == 256)
2695 IID = Intrinsic::x86_avx512_conflict_d_256;
2696 else if (Name[9] ==
'd' && VecWidth == 512)
2697 IID = Intrinsic::x86_avx512_conflict_d_512;
2698 else if (Name[9] ==
'q' && VecWidth == 128)
2699 IID = Intrinsic::x86_avx512_conflict_q_128;
2700 else if (Name[9] ==
'q' && VecWidth == 256)
2701 IID = Intrinsic::x86_avx512_conflict_q_256;
2702 else if (Name[9] ==
'q' && VecWidth == 512)
2703 IID = Intrinsic::x86_avx512_conflict_q_512;
2706 }
else if (Name.starts_with(
"pavg.")) {
2707 if (Name[5] ==
'b' && VecWidth == 128)
2708 IID = Intrinsic::x86_sse2_pavg_b;
2709 else if (Name[5] ==
'b' && VecWidth == 256)
2710 IID = Intrinsic::x86_avx2_pavg_b;
2711 else if (Name[5] ==
'b' && VecWidth == 512)
2712 IID = Intrinsic::x86_avx512_pavg_b_512;
2713 else if (Name[5] ==
'w' && VecWidth == 128)
2714 IID = Intrinsic::x86_sse2_pavg_w;
2715 else if (Name[5] ==
'w' && VecWidth == 256)
2716 IID = Intrinsic::x86_avx2_pavg_w;
2717 else if (Name[5] ==
'w' && VecWidth == 512)
2718 IID = Intrinsic::x86_avx512_pavg_w_512;
2727 Rep = Builder.CreateIntrinsic(IID, Args);
2738 if (AsmStr->find(
"mov\tfp") == 0 &&
2739 AsmStr->find(
"objc_retainAutoreleaseReturnValue") != std::string::npos &&
2740 (Pos = AsmStr->find(
"# marker")) != std::string::npos) {
2741 AsmStr->replace(Pos, 1,
";");
2747 Value *Rep =
nullptr;
2749 if (Name ==
"abs.i" || Name ==
"abs.ll") {
2751 Rep = Builder.CreateIntrinsic(Intrinsic::abs, {Arg->
getType()},
2752 {Arg, Builder.getTrue()},
2754 }
else if (Name ==
"abs.bf16" || Name ==
"abs.bf16x2") {
2755 Type *Ty = (Name ==
"abs.bf16")
2759 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2760 Rep = Builder.CreateBitCast(Abs, CI->
getType());
2761 }
else if (Name ==
"fabs.f" || Name ==
"fabs.ftz.f" || Name ==
"fabs.d") {
2762 Intrinsic::ID IID = (Name ==
"fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2763 : Intrinsic::nvvm_fabs;
2764 Rep = Builder.CreateUnaryIntrinsic(IID, CI->
getArgOperand(0));
2765 }
else if (Name.consume_front(
"ex2.approx.")) {
2767 Intrinsic::ID IID = Name.starts_with(
"ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2768 : Intrinsic::nvvm_ex2_approx;
2769 Rep = Builder.CreateUnaryIntrinsic(IID, CI->
getArgOperand(0));
2770 }
else if (Name.starts_with(
"atomic.load.add.f32.p") ||
2771 Name.starts_with(
"atomic.load.add.f64.p")) {
2774 Rep = Builder.CreateAtomicRMW(
2780 }
else if (Name.starts_with(
"atomic.load.inc.32.p") ||
2781 Name.starts_with(
"atomic.load.dec.32.p")) {
2786 Rep = Builder.CreateAtomicRMW(
2790 }
else if (Name.starts_with(
"atomic.") && Name.contains(
".gen.")) {
2796 Op.contains(
".cta.") ?
"block" :
"");
2797 if (
Op.starts_with(
"cas.")) {
2799 Value *Pair = Builder.CreateAtomicCmpXchg(
2802 Rep = Builder.CreateExtractValue(Pair, 0);
2820 "unexpected nvvm scoped atomic intrinsic");
2821 Rep = Builder.CreateAtomicRMW(BinOp, Ptr, Val,
MaybeAlign(),
2824 }
else if (Name ==
"clz.ll") {
2827 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->
getType()},
2828 {Arg, Builder.getFalse()},
2830 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(),
"ctlz.trunc");
2831 }
else if (Name ==
"popc.ll") {
2835 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->
getType()},
2836 Arg,
nullptr,
"ctpop");
2837 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(),
"ctpop.trunc");
2838 }
else if (Name ==
"h2f") {
2840 Builder.CreateBitCast(CI->
getArgOperand(0), Builder.getHalfTy());
2841 Rep = Builder.CreateFPExt(Cast, Builder.getFloatTy());
2842 }
else if (Name.consume_front(
"bitcast.") &&
2843 (Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" ||
2846 }
else if (Name ==
"rotate.b32") {
2849 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2850 {Arg, Arg, ShiftAmt});
2851 }
else if (Name ==
"rotate.b64") {
2855 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2856 {Arg, Arg, ZExtShiftAmt});
2857 }
else if (Name ==
"rotate.right.b64") {
2861 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2862 {Arg, Arg, ZExtShiftAmt});
2863 }
else if (Name ==
"swap.lo.hi.b64") {
2866 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2867 {Arg, Arg, Builder.getInt64(32)});
2868 }
else if ((Name.consume_front(
"ptr.gen.to.") &&
2871 Name.starts_with(
".to.gen"))) {
2873 }
else if (Name.consume_front(
"ldg.global")) {
2877 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2880 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2882 }
else if (Name ==
"tanh.approx.f32") {
2886 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->
getArgOperand(0),
2888 }
else if (Name ==
"barrier0" || Name ==
"barrier.n" || Name ==
"bar.sync") {
2890 Name.ends_with(
'0') ? Builder.getInt32(0) : CI->
getArgOperand(0);
2891 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2893 }
else if (Name ==
"barrier") {
2894 Rep = Builder.CreateIntrinsic(
2895 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2897 }
else if (Name ==
"barrier.sync") {
2898 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2900 }
else if (Name ==
"barrier.sync.cnt") {
2901 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2903 }
else if (Name ==
"barrier0.popc" || Name ==
"barrier0.and" ||
2904 Name ==
"barrier0.or") {
2906 C = Builder.CreateICmpNE(
C, Builder.getInt32(0));
2910 .
Case(
"barrier0.popc",
2911 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2912 .
Case(
"barrier0.and",
2913 Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2914 .
Case(
"barrier0.or",
2915 Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2916 Value *Bar = Builder.CreateIntrinsic(IID, {}, {Builder.getInt32(0),
C});
2917 Rep = Builder.CreateZExt(Bar, CI->
getType());
2921 !
F->getReturnType()->getScalarType()->isBFloatTy()) {
2931 ? Builder.CreateBitCast(Arg, NewType)
2934 Rep = Builder.CreateCall(NewFn, Args);
2935 if (
F->getReturnType()->isIntegerTy())
2936 Rep = Builder.CreateBitCast(Rep,
F->getReturnType());
2946 Value *Rep =
nullptr;
2948 if (Name.starts_with(
"sse4a.movnt.")) {
2960 Builder.CreateExtractElement(Arg1, (
uint64_t)0,
"extractelement");
2963 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2964 }
else if (Name.starts_with(
"avx.movnt.") ||
2965 Name.starts_with(
"avx512.storent.")) {
2977 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2978 }
else if (Name ==
"sse2.storel.dq") {
2983 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
2984 Value *Elt = Builder.CreateExtractElement(BC0, (
uint64_t)0);
2985 Builder.CreateAlignedStore(Elt, Arg0,
Align(1));
2986 }
else if (Name.starts_with(
"sse.storeu.") ||
2987 Name.starts_with(
"sse2.storeu.") ||
2988 Name.starts_with(
"avx.storeu.")) {
2991 Builder.CreateAlignedStore(Arg1, Arg0,
Align(1));
2992 }
else if (Name ==
"avx512.mask.store.ss") {
2996 }
else if (Name.starts_with(
"avx512.mask.store")) {
2998 bool Aligned = Name[17] !=
'u';
3001 }
else if (Name.starts_with(
"sse2.pcmp") || Name.starts_with(
"avx2.pcmp")) {
3004 bool CmpEq = Name[9] ==
'e';
3007 Rep = Builder.CreateSExt(Rep, CI->
getType(),
"");
3008 }
else if (Name.starts_with(
"avx512.broadcastm")) {
3015 Rep = Builder.CreateVectorSplat(NumElts, Rep);
3016 }
else if (Name ==
"sse.sqrt.ss" || Name ==
"sse2.sqrt.sd") {
3018 Value *Elt0 = Builder.CreateExtractElement(Vec, (
uint64_t)0);
3019 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->
getType(), Elt0);
3020 Rep = Builder.CreateInsertElement(Vec, Elt0, (
uint64_t)0);
3021 }
else if (Name.starts_with(
"avx.sqrt.p") ||
3022 Name.starts_with(
"sse2.sqrt.p") ||
3023 Name.starts_with(
"sse.sqrt.p")) {
3024 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
3025 {CI->getArgOperand(0)});
3026 }
else if (Name.starts_with(
"avx512.mask.sqrt.p")) {
3030 Intrinsic::ID IID = Name[18] ==
's' ? Intrinsic::x86_avx512_sqrt_ps_512
3031 : Intrinsic::x86_avx512_sqrt_pd_512;
3034 Rep = Builder.CreateIntrinsic(IID, Args);
3036 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
3037 {CI->getArgOperand(0)});
3041 }
else if (Name.starts_with(
"avx512.ptestm") ||
3042 Name.starts_with(
"avx512.ptestnm")) {
3046 Rep = Builder.CreateAnd(Op0, Op1);
3052 Rep = Builder.CreateICmp(Pred, Rep, Zero);
3054 }
else if (Name.starts_with(
"avx512.mask.pbroadcast")) {
3057 Rep = Builder.CreateVectorSplat(NumElts, CI->
getArgOperand(0));
3060 }
else if (Name.starts_with(
"avx512.kunpck")) {
3065 for (
unsigned i = 0; i != NumElts; ++i)
3074 Rep = Builder.CreateShuffleVector(
RHS,
LHS,
ArrayRef(Indices, NumElts));
3075 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3076 }
else if (Name ==
"avx512.kand.w") {
3079 Rep = Builder.CreateAnd(
LHS,
RHS);
3080 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3081 }
else if (Name ==
"avx512.kandn.w") {
3084 LHS = Builder.CreateNot(
LHS);
3085 Rep = Builder.CreateAnd(
LHS,
RHS);
3086 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3087 }
else if (Name ==
"avx512.kor.w") {
3090 Rep = Builder.CreateOr(
LHS,
RHS);
3091 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3092 }
else if (Name ==
"avx512.kxor.w") {
3095 Rep = Builder.CreateXor(
LHS,
RHS);
3096 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3097 }
else if (Name ==
"avx512.kxnor.w") {
3100 LHS = Builder.CreateNot(
LHS);
3101 Rep = Builder.CreateXor(
LHS,
RHS);
3102 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3103 }
else if (Name ==
"avx512.knot.w") {
3105 Rep = Builder.CreateNot(Rep);
3106 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3107 }
else if (Name ==
"avx512.kortestz.w" || Name ==
"avx512.kortestc.w") {
3110 Rep = Builder.CreateOr(
LHS,
RHS);
3111 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
3113 if (Name[14] ==
'c')
3117 Rep = Builder.CreateICmpEQ(Rep,
C);
3118 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
3119 }
else if (Name ==
"sse.add.ss" || Name ==
"sse2.add.sd" ||
3120 Name ==
"sse.sub.ss" || Name ==
"sse2.sub.sd" ||
3121 Name ==
"sse.mul.ss" || Name ==
"sse2.mul.sd" ||
3122 Name ==
"sse.div.ss" || Name ==
"sse2.div.sd") {
3125 ConstantInt::get(I32Ty, 0));
3127 ConstantInt::get(I32Ty, 0));
3129 if (Name.contains(
".add."))
3130 EltOp = Builder.CreateFAdd(Elt0, Elt1);
3131 else if (Name.contains(
".sub."))
3132 EltOp = Builder.CreateFSub(Elt0, Elt1);
3133 else if (Name.contains(
".mul."))
3134 EltOp = Builder.CreateFMul(Elt0, Elt1);
3136 EltOp = Builder.CreateFDiv(Elt0, Elt1);
3137 Rep = Builder.CreateInsertElement(CI->
getArgOperand(0), EltOp,
3138 ConstantInt::get(I32Ty, 0));
3139 }
else if (Name.starts_with(
"avx512.mask.pcmp")) {
3141 bool CmpEq = Name[16] ==
'e';
3143 }
else if (Name.starts_with(
"avx512.mask.vpshufbitqmb.")) {
3152 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3155 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3158 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3165 }
else if (Name.starts_with(
"avx512.mask.fpclass.p")) {
3170 if (VecWidth == 128 && EltWidth == 32)
3171 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3172 else if (VecWidth == 256 && EltWidth == 32)
3173 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3174 else if (VecWidth == 512 && EltWidth == 32)
3175 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3176 else if (VecWidth == 128 && EltWidth == 64)
3177 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3178 else if (VecWidth == 256 && EltWidth == 64)
3179 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3180 else if (VecWidth == 512 && EltWidth == 64)
3181 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3188 }
else if (Name.starts_with(
"avx512.cmp.p")) {
3190 Type *OpTy = Args[0]->getType();
3194 if (VecWidth == 128 && EltWidth == 32)
3195 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3196 else if (VecWidth == 256 && EltWidth == 32)
3197 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3198 else if (VecWidth == 512 && EltWidth == 32)
3199 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3200 else if (VecWidth == 128 && EltWidth == 64)
3201 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3202 else if (VecWidth == 256 && EltWidth == 64)
3203 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3204 else if (VecWidth == 512 && EltWidth == 64)
3205 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3210 if (VecWidth == 512)
3212 Args.push_back(Mask);
3214 Rep = Builder.CreateIntrinsic(IID, Args);
3215 }
else if (Name.starts_with(
"avx512.mask.cmp.")) {
3219 }
else if (Name.starts_with(
"avx512.mask.ucmp.")) {
3222 }
else if (Name.starts_with(
"avx512.cvtb2mask.") ||
3223 Name.starts_with(
"avx512.cvtw2mask.") ||
3224 Name.starts_with(
"avx512.cvtd2mask.") ||
3225 Name.starts_with(
"avx512.cvtq2mask.")) {
3230 }
else if (Name ==
"ssse3.pabs.b.128" || Name ==
"ssse3.pabs.w.128" ||
3231 Name ==
"ssse3.pabs.d.128" || Name.starts_with(
"avx2.pabs") ||
3232 Name.starts_with(
"avx512.mask.pabs")) {
3234 }
else if (Name ==
"sse41.pmaxsb" || Name ==
"sse2.pmaxs.w" ||
3235 Name ==
"sse41.pmaxsd" || Name.starts_with(
"avx2.pmaxs") ||
3236 Name.starts_with(
"avx512.mask.pmaxs")) {
3238 }
else if (Name ==
"sse2.pmaxu.b" || Name ==
"sse41.pmaxuw" ||
3239 Name ==
"sse41.pmaxud" || Name.starts_with(
"avx2.pmaxu") ||
3240 Name.starts_with(
"avx512.mask.pmaxu")) {
3242 }
else if (Name ==
"sse41.pminsb" || Name ==
"sse2.pmins.w" ||
3243 Name ==
"sse41.pminsd" || Name.starts_with(
"avx2.pmins") ||
3244 Name.starts_with(
"avx512.mask.pmins")) {
3246 }
else if (Name ==
"sse2.pminu.b" || Name ==
"sse41.pminuw" ||
3247 Name ==
"sse41.pminud" || Name.starts_with(
"avx2.pminu") ||
3248 Name.starts_with(
"avx512.mask.pminu")) {
3250 }
else if (Name ==
"sse2.pmulu.dq" || Name ==
"avx2.pmulu.dq" ||
3251 Name ==
"avx512.pmulu.dq.512" ||
3252 Name.starts_with(
"avx512.mask.pmulu.dq.")) {
3254 }
else if (Name ==
"sse41.pmuldq" || Name ==
"avx2.pmul.dq" ||
3255 Name ==
"avx512.pmul.dq.512" ||
3256 Name.starts_with(
"avx512.mask.pmul.dq.")) {
3258 }
else if (Name ==
"sse.cvtsi2ss" || Name ==
"sse2.cvtsi2sd" ||
3259 Name ==
"sse.cvtsi642ss" || Name ==
"sse2.cvtsi642sd") {
3264 }
else if (Name ==
"avx512.cvtusi2sd") {
3269 }
else if (Name ==
"sse2.cvtss2sd") {
3271 Rep = Builder.CreateFPExt(
3274 }
else if (Name ==
"sse2.cvtdq2pd" || Name ==
"sse2.cvtdq2ps" ||
3275 Name ==
"avx.cvtdq2.pd.256" || Name ==
"avx.cvtdq2.ps.256" ||
3276 Name.starts_with(
"avx512.mask.cvtdq2pd.") ||
3277 Name.starts_with(
"avx512.mask.cvtudq2pd.") ||
3278 Name.starts_with(
"avx512.mask.cvtdq2ps.") ||
3279 Name.starts_with(
"avx512.mask.cvtudq2ps.") ||
3280 Name.starts_with(
"avx512.mask.cvtqq2pd.") ||
3281 Name.starts_with(
"avx512.mask.cvtuqq2pd.") ||
3282 Name ==
"avx512.mask.cvtqq2ps.256" ||
3283 Name ==
"avx512.mask.cvtqq2ps.512" ||
3284 Name ==
"avx512.mask.cvtuqq2ps.256" ||
3285 Name ==
"avx512.mask.cvtuqq2ps.512" || Name ==
"sse2.cvtps2pd" ||
3286 Name ==
"avx.cvt.ps2.pd.256" ||
3287 Name ==
"avx512.mask.cvtps2pd.128" ||
3288 Name ==
"avx512.mask.cvtps2pd.256") {
3293 unsigned NumDstElts = DstTy->getNumElements();
3295 assert(NumDstElts == 2 &&
"Unexpected vector size");
3296 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1});
3299 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3300 bool IsUnsigned = Name.contains(
"cvtu");
3302 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtps2pd");
3306 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3307 : Intrinsic::x86_avx512_sitofp_round;
3308 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3311 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy,
"cvt")
3312 : Builder.CreateSIToFP(Rep, DstTy,
"cvt");
3318 }
else if (Name.starts_with(
"avx512.mask.vcvtph2ps.") ||
3319 Name.starts_with(
"vcvtph2ps.")) {
3323 unsigned NumDstElts = DstTy->getNumElements();
3324 if (NumDstElts != SrcTy->getNumElements()) {
3325 assert(NumDstElts == 4 &&
"Unexpected vector size");
3326 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1, 2, 3});
3328 Rep = Builder.CreateBitCast(
3330 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtph2ps");
3334 }
else if (Name.starts_with(
"avx512.mask.load")) {
3336 bool Aligned = Name[16] !=
'u';
3339 }
else if (Name.starts_with(
"avx512.mask.expand.load.")) {
3343 ResultTy->getNumElements());
3344 Rep = Builder.CreateIntrinsic(
3345 Intrinsic::masked_expandload, {ResultTy, PtrTy},
3347 }
else if (Name.starts_with(
"avx512.mask.compress.store.")) {
3353 Rep = Builder.CreateIntrinsic(
3354 Intrinsic::masked_compressstore, {ResultTy, PtrTy},
3356 }
else if (Name.starts_with(
"avx512.mask.compress.") ||
3357 Name.starts_with(
"avx512.mask.expand.")) {
3361 ResultTy->getNumElements());
3363 bool IsCompress = Name[12] ==
'c';
3364 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3365 : Intrinsic::x86_avx512_mask_expand;
3366 Rep = Builder.CreateIntrinsic(
3368 }
else if (Name.starts_with(
"xop.vpcom")) {
3370 if (Name.ends_with(
"ub") || Name.ends_with(
"uw") || Name.ends_with(
"ud") ||
3371 Name.ends_with(
"uq"))
3373 else if (Name.ends_with(
"b") || Name.ends_with(
"w") ||
3374 Name.ends_with(
"d") || Name.ends_with(
"q"))
3383 Name = Name.substr(9);
3384 if (Name.starts_with(
"lt"))
3386 else if (Name.starts_with(
"le"))
3388 else if (Name.starts_with(
"gt"))
3390 else if (Name.starts_with(
"ge"))
3392 else if (Name.starts_with(
"eq"))
3394 else if (Name.starts_with(
"ne"))
3396 else if (Name.starts_with(
"false"))
3398 else if (Name.starts_with(
"true"))
3405 }
else if (Name.starts_with(
"xop.vpcmov")) {
3407 Value *NotSel = Builder.CreateNot(Sel);
3410 Rep = Builder.CreateOr(Sel0, Sel1);
3411 }
else if (Name.starts_with(
"xop.vprot") || Name.starts_with(
"avx512.prol") ||
3412 Name.starts_with(
"avx512.mask.prol")) {
3414 }
else if (Name.starts_with(
"avx512.pror") ||
3415 Name.starts_with(
"avx512.mask.pror")) {
3417 }
else if (Name.starts_with(
"avx512.vpshld.") ||
3418 Name.starts_with(
"avx512.mask.vpshld") ||
3419 Name.starts_with(
"avx512.maskz.vpshld")) {
3420 bool ZeroMask = Name[11] ==
'z';
3422 }
else if (Name.starts_with(
"avx512.vpshrd.") ||
3423 Name.starts_with(
"avx512.mask.vpshrd") ||
3424 Name.starts_with(
"avx512.maskz.vpshrd")) {
3425 bool ZeroMask = Name[11] ==
'z';
3427 }
else if (Name ==
"sse42.crc32.64.8") {
3430 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3432 Rep = Builder.CreateZExt(Rep, CI->
getType(),
"");
3433 }
else if (Name.starts_with(
"avx.vbroadcast.s") ||
3434 Name.starts_with(
"avx512.vbroadcast.s")) {
3437 Type *EltTy = VecTy->getElementType();
3438 unsigned EltNum = VecTy->getNumElements();
3442 for (
unsigned I = 0;
I < EltNum; ++
I)
3443 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty,
I));
3444 }
else if (Name.starts_with(
"sse41.pmovsx") ||
3445 Name.starts_with(
"sse41.pmovzx") ||
3446 Name.starts_with(
"avx2.pmovsx") ||
3447 Name.starts_with(
"avx2.pmovzx") ||
3448 Name.starts_with(
"avx512.mask.pmovsx") ||
3449 Name.starts_with(
"avx512.mask.pmovzx")) {
3451 unsigned NumDstElts = DstTy->getNumElements();
3455 for (
unsigned i = 0; i != NumDstElts; ++i)
3460 bool DoSext = Name.contains(
"pmovsx");
3462 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3467 }
else if (Name ==
"avx512.mask.pmov.qd.256" ||
3468 Name ==
"avx512.mask.pmov.qd.512" ||
3469 Name ==
"avx512.mask.pmov.wb.256" ||
3470 Name ==
"avx512.mask.pmov.wb.512") {
3475 }
else if (Name.starts_with(
"avx.vbroadcastf128") ||
3476 Name ==
"avx2.vbroadcasti128") {
3482 if (NumSrcElts == 2)
3483 Rep = Builder.CreateShuffleVector(Load,
ArrayRef<int>{0, 1, 0, 1});
3485 Rep = Builder.CreateShuffleVector(Load,
3487 }
else if (Name.starts_with(
"avx512.mask.shuf.i") ||
3488 Name.starts_with(
"avx512.mask.shuf.f")) {
3493 unsigned ControlBitsMask = NumLanes - 1;
3494 unsigned NumControlBits = NumLanes / 2;
3497 for (
unsigned l = 0; l != NumLanes; ++l) {
3498 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3500 if (l >= NumLanes / 2)
3501 LaneMask += NumLanes;
3502 for (
unsigned i = 0; i != NumElementsInLane; ++i)
3503 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3509 }
else if (Name.starts_with(
"avx512.mask.broadcastf") ||
3510 Name.starts_with(
"avx512.mask.broadcasti")) {
3513 unsigned NumDstElts =
3517 for (
unsigned i = 0; i != NumDstElts; ++i)
3518 ShuffleMask[i] = i % NumSrcElts;
3524 }
else if (Name.starts_with(
"avx2.pbroadcast") ||
3525 Name.starts_with(
"avx2.vbroadcast") ||
3526 Name.starts_with(
"avx512.pbroadcast") ||
3527 Name.starts_with(
"avx512.mask.broadcast.s")) {
3534 Rep = Builder.CreateShuffleVector(
Op, M);
3539 }
else if (Name.starts_with(
"sse2.padds.") ||
3540 Name.starts_with(
"avx2.padds.") ||
3541 Name.starts_with(
"avx512.padds.") ||
3542 Name.starts_with(
"avx512.mask.padds.")) {
3544 }
else if (Name.starts_with(
"sse2.psubs.") ||
3545 Name.starts_with(
"avx2.psubs.") ||
3546 Name.starts_with(
"avx512.psubs.") ||
3547 Name.starts_with(
"avx512.mask.psubs.")) {
3549 }
else if (Name.starts_with(
"sse2.paddus.") ||
3550 Name.starts_with(
"avx2.paddus.") ||
3551 Name.starts_with(
"avx512.mask.paddus.")) {
3553 }
else if (Name.starts_with(
"sse2.psubus.") ||
3554 Name.starts_with(
"avx2.psubus.") ||
3555 Name.starts_with(
"avx512.mask.psubus.")) {
3557 }
else if (Name.starts_with(
"avx512.mask.palignr.")) {
3562 }
else if (Name.starts_with(
"avx512.mask.valign.")) {
3566 }
else if (Name ==
"sse2.psll.dq" || Name ==
"avx2.psll.dq") {
3571 }
else if (Name ==
"sse2.psrl.dq" || Name ==
"avx2.psrl.dq") {
3576 }
else if (Name ==
"sse2.psll.dq.bs" || Name ==
"avx2.psll.dq.bs" ||
3577 Name ==
"avx512.psll.dq.512") {
3581 }
else if (Name ==
"sse2.psrl.dq.bs" || Name ==
"avx2.psrl.dq.bs" ||
3582 Name ==
"avx512.psrl.dq.512") {
3586 }
else if (Name ==
"sse41.pblendw" || Name.starts_with(
"sse41.blendp") ||
3587 Name.starts_with(
"avx.blend.p") || Name ==
"avx2.pblendw" ||
3588 Name.starts_with(
"avx2.pblendd.")) {
3593 unsigned NumElts = VecTy->getNumElements();
3596 for (
unsigned i = 0; i != NumElts; ++i)
3597 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3599 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3600 }
else if (Name.starts_with(
"avx.vinsertf128.") ||
3601 Name ==
"avx2.vinserti128" ||
3602 Name.starts_with(
"avx512.mask.insert")) {
3606 unsigned DstNumElts =
3608 unsigned SrcNumElts =
3610 unsigned Scale = DstNumElts / SrcNumElts;
3617 for (
unsigned i = 0; i != SrcNumElts; ++i)
3619 for (
unsigned i = SrcNumElts; i != DstNumElts; ++i)
3620 Idxs[i] = SrcNumElts;
3621 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3635 for (
unsigned i = 0; i != DstNumElts; ++i)
3638 for (
unsigned i = 0; i != SrcNumElts; ++i)
3639 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3640 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3646 }
else if (Name.starts_with(
"avx.vextractf128.") ||
3647 Name ==
"avx2.vextracti128" ||
3648 Name.starts_with(
"avx512.mask.vextract")) {
3651 unsigned DstNumElts =
3653 unsigned SrcNumElts =
3655 unsigned Scale = SrcNumElts / DstNumElts;
3662 for (
unsigned i = 0; i != DstNumElts; ++i) {
3663 Idxs[i] = i + (Imm * DstNumElts);
3665 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3671 }
else if (Name.starts_with(
"avx512.mask.perm.df.") ||
3672 Name.starts_with(
"avx512.mask.perm.di.")) {
3676 unsigned NumElts = VecTy->getNumElements();
3679 for (
unsigned i = 0; i != NumElts; ++i)
3680 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3682 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3687 }
else if (Name.starts_with(
"avx.vperm2f128.") || Name ==
"avx2.vperm2i128") {
3699 unsigned HalfSize = NumElts / 2;
3711 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3712 for (
unsigned i = 0; i < HalfSize; ++i)
3713 ShuffleMask[i] = StartIndex + i;
3716 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3717 for (
unsigned i = 0; i < HalfSize; ++i)
3718 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3720 Rep = Builder.CreateShuffleVector(V0,
V1, ShuffleMask);
3722 }
else if (Name.starts_with(
"avx.vpermil.") || Name ==
"sse2.pshuf.d" ||
3723 Name.starts_with(
"avx512.mask.vpermil.p") ||
3724 Name.starts_with(
"avx512.mask.pshuf.d.")) {
3728 unsigned NumElts = VecTy->getNumElements();
3730 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3731 unsigned IdxMask = ((1 << IdxSize) - 1);
3737 for (
unsigned i = 0; i != NumElts; ++i)
3738 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3740 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3745 }
else if (Name ==
"sse2.pshufl.w" ||
3746 Name.starts_with(
"avx512.mask.pshufl.w.")) {
3751 if (Name ==
"sse2.pshufl.w" && NumElts % 8 != 0)
3755 for (
unsigned l = 0; l != NumElts; l += 8) {
3756 for (
unsigned i = 0; i != 4; ++i)
3757 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3758 for (
unsigned i = 4; i != 8; ++i)
3759 Idxs[i + l] = i + l;
3762 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3767 }
else if (Name ==
"sse2.pshufh.w" ||
3768 Name.starts_with(
"avx512.mask.pshufh.w.")) {
3773 if (Name ==
"sse2.pshufh.w" && NumElts % 8 != 0)
3777 for (
unsigned l = 0; l != NumElts; l += 8) {
3778 for (
unsigned i = 0; i != 4; ++i)
3779 Idxs[i + l] = i + l;
3780 for (
unsigned i = 0; i != 4; ++i)
3781 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3784 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3789 }
else if (Name.starts_with(
"avx512.mask.shuf.p")) {
3796 unsigned HalfLaneElts = NumLaneElts / 2;
3799 for (
unsigned i = 0; i != NumElts; ++i) {
3801 Idxs[i] = i - (i % NumLaneElts);
3803 if ((i % NumLaneElts) >= HalfLaneElts)
3807 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3810 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3814 }
else if (Name.starts_with(
"avx512.mask.movddup") ||
3815 Name.starts_with(
"avx512.mask.movshdup") ||
3816 Name.starts_with(
"avx512.mask.movsldup")) {
3822 if (Name.starts_with(
"avx512.mask.movshdup."))
3826 for (
unsigned l = 0; l != NumElts; l += NumLaneElts)
3827 for (
unsigned i = 0; i != NumLaneElts; i += 2) {
3828 Idxs[i + l + 0] = i + l +
Offset;
3829 Idxs[i + l + 1] = i + l +
Offset;
3832 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3836 }
else if (Name.starts_with(
"avx512.mask.punpckl") ||
3837 Name.starts_with(
"avx512.mask.unpckl.")) {
3844 for (
int l = 0; l != NumElts; l += NumLaneElts)
3845 for (
int i = 0; i != NumLaneElts; ++i)
3846 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3848 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3852 }
else if (Name.starts_with(
"avx512.mask.punpckh") ||
3853 Name.starts_with(
"avx512.mask.unpckh.")) {
3860 for (
int l = 0; l != NumElts; l += NumLaneElts)
3861 for (
int i = 0; i != NumLaneElts; ++i)
3862 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3864 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3868 }
else if (Name.starts_with(
"avx512.mask.and.") ||
3869 Name.starts_with(
"avx512.mask.pand.")) {
3872 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3874 Rep = Builder.CreateBitCast(Rep, FTy);
3877 }
else if (Name.starts_with(
"avx512.mask.andn.") ||
3878 Name.starts_with(
"avx512.mask.pandn.")) {
3881 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->
getArgOperand(0), ITy));
3882 Rep = Builder.CreateAnd(Rep,
3884 Rep = Builder.CreateBitCast(Rep, FTy);
3887 }
else if (Name.starts_with(
"avx512.mask.or.") ||
3888 Name.starts_with(
"avx512.mask.por.")) {
3891 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3893 Rep = Builder.CreateBitCast(Rep, FTy);
3896 }
else if (Name.starts_with(
"avx512.mask.xor.") ||
3897 Name.starts_with(
"avx512.mask.pxor.")) {
3900 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3902 Rep = Builder.CreateBitCast(Rep, FTy);
3905 }
else if (Name.starts_with(
"avx512.mask.padd.")) {
3909 }
else if (Name.starts_with(
"avx512.mask.psub.")) {
3913 }
else if (Name.starts_with(
"avx512.mask.pmull.")) {
3917 }
else if (Name.starts_with(
"avx512.mask.add.p")) {
3918 if (Name.ends_with(
".512")) {
3920 if (Name[17] ==
's')
3921 IID = Intrinsic::x86_avx512_add_ps_512;
3923 IID = Intrinsic::x86_avx512_add_pd_512;
3925 Rep = Builder.CreateIntrinsic(
3933 }
else if (Name.starts_with(
"avx512.mask.div.p")) {
3934 if (Name.ends_with(
".512")) {
3936 if (Name[17] ==
's')
3937 IID = Intrinsic::x86_avx512_div_ps_512;
3939 IID = Intrinsic::x86_avx512_div_pd_512;
3941 Rep = Builder.CreateIntrinsic(
3949 }
else if (Name.starts_with(
"avx512.mask.mul.p")) {
3950 if (Name.ends_with(
".512")) {
3952 if (Name[17] ==
's')
3953 IID = Intrinsic::x86_avx512_mul_ps_512;
3955 IID = Intrinsic::x86_avx512_mul_pd_512;
3957 Rep = Builder.CreateIntrinsic(
3965 }
else if (Name.starts_with(
"avx512.mask.sub.p")) {
3966 if (Name.ends_with(
".512")) {
3968 if (Name[17] ==
's')
3969 IID = Intrinsic::x86_avx512_sub_ps_512;
3971 IID = Intrinsic::x86_avx512_sub_pd_512;
3973 Rep = Builder.CreateIntrinsic(
3981 }
else if ((Name.starts_with(
"avx512.mask.max.p") ||
3982 Name.starts_with(
"avx512.mask.min.p")) &&
3983 Name.drop_front(18) ==
".512") {
3984 bool IsDouble = Name[17] ==
'd';
3985 bool IsMin = Name[13] ==
'i';
3987 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3988 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3991 Rep = Builder.CreateIntrinsic(
3996 }
else if (Name.starts_with(
"avx512.mask.lzcnt.")) {
3998 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->
getType(),
3999 {CI->getArgOperand(0), Builder.getInt1(false)});
4002 }
else if (Name.starts_with(
"avx512.mask.psll")) {
4003 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
4004 bool IsVariable = Name[16] ==
'v';
4005 char Size = Name[16] ==
'.' ? Name[17]
4006 : Name[17] ==
'.' ? Name[18]
4007 : Name[18] ==
'.' ? Name[19]
4011 if (IsVariable && Name[17] !=
'.') {
4012 if (
Size ==
'd' && Name[17] ==
'2')
4013 IID = Intrinsic::x86_avx2_psllv_q;
4014 else if (
Size ==
'd' && Name[17] ==
'4')
4015 IID = Intrinsic::x86_avx2_psllv_q_256;
4016 else if (
Size ==
's' && Name[17] ==
'4')
4017 IID = Intrinsic::x86_avx2_psllv_d;
4018 else if (
Size ==
's' && Name[17] ==
'8')
4019 IID = Intrinsic::x86_avx2_psllv_d_256;
4020 else if (
Size ==
'h' && Name[17] ==
'8')
4021 IID = Intrinsic::x86_avx512_psllv_w_128;
4022 else if (
Size ==
'h' && Name[17] ==
'1')
4023 IID = Intrinsic::x86_avx512_psllv_w_256;
4024 else if (Name[17] ==
'3' && Name[18] ==
'2')
4025 IID = Intrinsic::x86_avx512_psllv_w_512;
4028 }
else if (Name.ends_with(
".128")) {
4030 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
4031 : Intrinsic::x86_sse2_psll_d;
4032 else if (
Size ==
'q')
4033 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
4034 : Intrinsic::x86_sse2_psll_q;
4035 else if (
Size ==
'w')
4036 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
4037 : Intrinsic::x86_sse2_psll_w;
4040 }
else if (Name.ends_with(
".256")) {
4042 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
4043 : Intrinsic::x86_avx2_psll_d;
4044 else if (
Size ==
'q')
4045 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
4046 : Intrinsic::x86_avx2_psll_q;
4047 else if (
Size ==
'w')
4048 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
4049 : Intrinsic::x86_avx2_psll_w;
4054 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
4055 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
4056 : Intrinsic::x86_avx512_psll_d_512;
4057 else if (
Size ==
'q')
4058 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
4059 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
4060 : Intrinsic::x86_avx512_psll_q_512;
4061 else if (
Size ==
'w')
4062 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
4063 : Intrinsic::x86_avx512_psll_w_512;
4069 }
else if (Name.starts_with(
"avx512.mask.psrl")) {
4070 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
4071 bool IsVariable = Name[16] ==
'v';
4072 char Size = Name[16] ==
'.' ? Name[17]
4073 : Name[17] ==
'.' ? Name[18]
4074 : Name[18] ==
'.' ? Name[19]
4078 if (IsVariable && Name[17] !=
'.') {
4079 if (
Size ==
'd' && Name[17] ==
'2')
4080 IID = Intrinsic::x86_avx2_psrlv_q;
4081 else if (
Size ==
'd' && Name[17] ==
'4')
4082 IID = Intrinsic::x86_avx2_psrlv_q_256;
4083 else if (
Size ==
's' && Name[17] ==
'4')
4084 IID = Intrinsic::x86_avx2_psrlv_d;
4085 else if (
Size ==
's' && Name[17] ==
'8')
4086 IID = Intrinsic::x86_avx2_psrlv_d_256;
4087 else if (
Size ==
'h' && Name[17] ==
'8')
4088 IID = Intrinsic::x86_avx512_psrlv_w_128;
4089 else if (
Size ==
'h' && Name[17] ==
'1')
4090 IID = Intrinsic::x86_avx512_psrlv_w_256;
4091 else if (Name[17] ==
'3' && Name[18] ==
'2')
4092 IID = Intrinsic::x86_avx512_psrlv_w_512;
4095 }
else if (Name.ends_with(
".128")) {
4097 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
4098 : Intrinsic::x86_sse2_psrl_d;
4099 else if (
Size ==
'q')
4100 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
4101 : Intrinsic::x86_sse2_psrl_q;
4102 else if (
Size ==
'w')
4103 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
4104 : Intrinsic::x86_sse2_psrl_w;
4107 }
else if (Name.ends_with(
".256")) {
4109 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
4110 : Intrinsic::x86_avx2_psrl_d;
4111 else if (
Size ==
'q')
4112 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
4113 : Intrinsic::x86_avx2_psrl_q;
4114 else if (
Size ==
'w')
4115 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
4116 : Intrinsic::x86_avx2_psrl_w;
4121 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
4122 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
4123 : Intrinsic::x86_avx512_psrl_d_512;
4124 else if (
Size ==
'q')
4125 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
4126 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
4127 : Intrinsic::x86_avx512_psrl_q_512;
4128 else if (
Size ==
'w')
4129 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
4130 : Intrinsic::x86_avx512_psrl_w_512;
4136 }
else if (Name.starts_with(
"avx512.mask.psra")) {
4137 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
4138 bool IsVariable = Name[16] ==
'v';
4139 char Size = Name[16] ==
'.' ? Name[17]
4140 : Name[17] ==
'.' ? Name[18]
4141 : Name[18] ==
'.' ? Name[19]
4145 if (IsVariable && Name[17] !=
'.') {
4146 if (
Size ==
's' && Name[17] ==
'4')
4147 IID = Intrinsic::x86_avx2_psrav_d;
4148 else if (
Size ==
's' && Name[17] ==
'8')
4149 IID = Intrinsic::x86_avx2_psrav_d_256;
4150 else if (
Size ==
'h' && Name[17] ==
'8')
4151 IID = Intrinsic::x86_avx512_psrav_w_128;
4152 else if (
Size ==
'h' && Name[17] ==
'1')
4153 IID = Intrinsic::x86_avx512_psrav_w_256;
4154 else if (Name[17] ==
'3' && Name[18] ==
'2')
4155 IID = Intrinsic::x86_avx512_psrav_w_512;
4158 }
else if (Name.ends_with(
".128")) {
4160 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4161 : Intrinsic::x86_sse2_psra_d;
4162 else if (
Size ==
'q')
4163 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4164 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4165 : Intrinsic::x86_avx512_psra_q_128;
4166 else if (
Size ==
'w')
4167 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4168 : Intrinsic::x86_sse2_psra_w;
4171 }
else if (Name.ends_with(
".256")) {
4173 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4174 : Intrinsic::x86_avx2_psra_d;
4175 else if (
Size ==
'q')
4176 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4177 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4178 : Intrinsic::x86_avx512_psra_q_256;
4179 else if (
Size ==
'w')
4180 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4181 : Intrinsic::x86_avx2_psra_w;
4186 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4187 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4188 : Intrinsic::x86_avx512_psra_d_512;
4189 else if (
Size ==
'q')
4190 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4191 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4192 : Intrinsic::x86_avx512_psra_q_512;
4193 else if (
Size ==
'w')
4194 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4195 : Intrinsic::x86_avx512_psra_w_512;
4201 }
else if (Name.starts_with(
"avx512.mask.move.s")) {
4203 }
else if (Name.starts_with(
"avx512.cvtmask2")) {
4205 }
else if (Name.ends_with(
".movntdqa")) {
4209 LoadInst *LI = Builder.CreateAlignedLoad(
4214 }
else if (Name.starts_with(
"fma.vfmadd.") ||
4215 Name.starts_with(
"fma.vfmsub.") ||
4216 Name.starts_with(
"fma.vfnmadd.") ||
4217 Name.starts_with(
"fma.vfnmsub.")) {
4218 bool NegMul = Name[6] ==
'n';
4219 bool NegAcc = NegMul ? Name[8] ==
's' : Name[7] ==
's';
4220 bool IsScalar = NegMul ? Name[12] ==
's' : Name[11] ==
's';
4231 if (NegMul && !IsScalar)
4232 Ops[0] = Builder.CreateFNeg(
Ops[0]);
4233 if (NegMul && IsScalar)
4234 Ops[1] = Builder.CreateFNeg(
Ops[1]);
4236 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4238 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
4242 }
else if (Name.starts_with(
"fma4.vfmadd.s")) {
4250 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
4254 }
else if (Name.starts_with(
"avx512.mask.vfmadd.s") ||
4255 Name.starts_with(
"avx512.maskz.vfmadd.s") ||
4256 Name.starts_with(
"avx512.mask3.vfmadd.s") ||
4257 Name.starts_with(
"avx512.mask3.vfmsub.s") ||
4258 Name.starts_with(
"avx512.mask3.vfnmsub.s")) {
4259 bool IsMask3 = Name[11] ==
'3';
4260 bool IsMaskZ = Name[11] ==
'z';
4262 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4263 bool NegMul = Name[2] ==
'n';
4264 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
4270 if (NegMul && (IsMask3 || IsMaskZ))
4271 A = Builder.CreateFNeg(
A);
4272 if (NegMul && !(IsMask3 || IsMaskZ))
4273 B = Builder.CreateFNeg(
B);
4275 C = Builder.CreateFNeg(
C);
4277 A = Builder.CreateExtractElement(
A, (
uint64_t)0);
4278 B = Builder.CreateExtractElement(
B, (
uint64_t)0);
4279 C = Builder.CreateExtractElement(
C, (
uint64_t)0);
4286 if (Name.back() ==
'd')
4287 IID = Intrinsic::x86_avx512_vfmadd_f64;
4289 IID = Intrinsic::x86_avx512_vfmadd_f32;
4290 Rep = Builder.CreateIntrinsic(IID,
Ops);
4292 Rep = Builder.CreateFMA(
A,
B,
C);
4301 if (NegAcc && IsMask3)
4306 Rep = Builder.CreateInsertElement(CI->
getArgOperand(IsMask3 ? 2 : 0), Rep,
4308 }
else if (Name.starts_with(
"avx512.mask.vfmadd.p") ||
4309 Name.starts_with(
"avx512.mask.vfnmadd.p") ||
4310 Name.starts_with(
"avx512.mask.vfnmsub.p") ||
4311 Name.starts_with(
"avx512.mask3.vfmadd.p") ||
4312 Name.starts_with(
"avx512.mask3.vfmsub.p") ||
4313 Name.starts_with(
"avx512.mask3.vfnmsub.p") ||
4314 Name.starts_with(
"avx512.maskz.vfmadd.p")) {
4315 bool IsMask3 = Name[11] ==
'3';
4316 bool IsMaskZ = Name[11] ==
'z';
4318 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4319 bool NegMul = Name[2] ==
'n';
4320 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
4326 if (NegMul && (IsMask3 || IsMaskZ))
4327 A = Builder.CreateFNeg(
A);
4328 if (NegMul && !(IsMask3 || IsMaskZ))
4329 B = Builder.CreateFNeg(
B);
4331 C = Builder.CreateFNeg(
C);
4338 if (Name[Name.size() - 5] ==
's')
4339 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4341 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4345 Rep = Builder.CreateFMA(
A,
B,
C);
4353 }
else if (Name.starts_with(
"fma.vfmsubadd.p")) {
4357 if (VecWidth == 128 && EltWidth == 32)
4358 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4359 else if (VecWidth == 256 && EltWidth == 32)
4360 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4361 else if (VecWidth == 128 && EltWidth == 64)
4362 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4363 else if (VecWidth == 256 && EltWidth == 64)
4364 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4370 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4371 Rep = Builder.CreateIntrinsic(IID,
Ops);
4372 }
else if (Name.starts_with(
"avx512.mask.vfmaddsub.p") ||
4373 Name.starts_with(
"avx512.mask3.vfmaddsub.p") ||
4374 Name.starts_with(
"avx512.maskz.vfmaddsub.p") ||
4375 Name.starts_with(
"avx512.mask3.vfmsubadd.p")) {
4376 bool IsMask3 = Name[11] ==
'3';
4377 bool IsMaskZ = Name[11] ==
'z';
4379 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4380 bool IsSubAdd = Name[3] ==
's';
4384 if (Name[Name.size() - 5] ==
's')
4385 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4387 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4392 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4394 Rep = Builder.CreateIntrinsic(IID,
Ops);
4403 Value *Odd = Builder.CreateCall(FMA,
Ops);
4404 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4405 Value *Even = Builder.CreateCall(FMA,
Ops);
4411 for (
int i = 0; i != NumElts; ++i)
4412 Idxs[i] = i + (i % 2) * NumElts;
4414 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4422 }
else if (Name.starts_with(
"avx512.mask.pternlog.") ||
4423 Name.starts_with(
"avx512.maskz.pternlog.")) {
4424 bool ZeroMask = Name[11] ==
'z';
4428 if (VecWidth == 128 && EltWidth == 32)
4429 IID = Intrinsic::x86_avx512_pternlog_d_128;
4430 else if (VecWidth == 256 && EltWidth == 32)
4431 IID = Intrinsic::x86_avx512_pternlog_d_256;
4432 else if (VecWidth == 512 && EltWidth == 32)
4433 IID = Intrinsic::x86_avx512_pternlog_d_512;
4434 else if (VecWidth == 128 && EltWidth == 64)
4435 IID = Intrinsic::x86_avx512_pternlog_q_128;
4436 else if (VecWidth == 256 && EltWidth == 64)
4437 IID = Intrinsic::x86_avx512_pternlog_q_256;
4438 else if (VecWidth == 512 && EltWidth == 64)
4439 IID = Intrinsic::x86_avx512_pternlog_q_512;
4445 Rep = Builder.CreateIntrinsic(IID, Args);
4449 }
else if (Name.starts_with(
"avx512.mask.vpmadd52") ||
4450 Name.starts_with(
"avx512.maskz.vpmadd52")) {
4451 bool ZeroMask = Name[11] ==
'z';
4452 bool High = Name[20] ==
'h' || Name[21] ==
'h';
4455 if (VecWidth == 128 && !
High)
4456 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4457 else if (VecWidth == 256 && !
High)
4458 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4459 else if (VecWidth == 512 && !
High)
4460 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4461 else if (VecWidth == 128 &&
High)
4462 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4463 else if (VecWidth == 256 &&
High)
4464 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4465 else if (VecWidth == 512 &&
High)
4466 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4472 Rep = Builder.CreateIntrinsic(IID, Args);
4476 }
else if (Name.starts_with(
"avx512.mask.vpermi2var.") ||
4477 Name.starts_with(
"avx512.mask.vpermt2var.") ||
4478 Name.starts_with(
"avx512.maskz.vpermt2var.")) {
4479 bool ZeroMask = Name[11] ==
'z';
4480 bool IndexForm = Name[17] ==
'i';
4482 }
else if (Name.starts_with(
"avx512.mask.vpdpbusd.") ||
4483 Name.starts_with(
"avx512.maskz.vpdpbusd.") ||
4484 Name.starts_with(
"avx512.mask.vpdpbusds.") ||
4485 Name.starts_with(
"avx512.maskz.vpdpbusds.")) {
4486 bool ZeroMask = Name[11] ==
'z';
4487 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4490 if (VecWidth == 128 && !IsSaturating)
4491 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4492 else if (VecWidth == 256 && !IsSaturating)
4493 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4494 else if (VecWidth == 512 && !IsSaturating)
4495 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4496 else if (VecWidth == 128 && IsSaturating)
4497 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4498 else if (VecWidth == 256 && IsSaturating)
4499 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4500 else if (VecWidth == 512 && IsSaturating)
4501 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4511 if (Args[1]->
getType()->isVectorTy() &&
4514 ->isIntegerTy(32) &&
4515 Args[2]->
getType()->isVectorTy() &&
4518 ->isIntegerTy(32)) {
4519 Type *NewArgType =
nullptr;
4520 if (VecWidth == 128)
4522 else if (VecWidth == 256)
4524 else if (VecWidth == 512)
4530 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4531 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4534 Rep = Builder.CreateIntrinsic(IID, Args);
4538 }
else if (Name.starts_with(
"avx512.mask.vpdpwssd.") ||
4539 Name.starts_with(
"avx512.maskz.vpdpwssd.") ||
4540 Name.starts_with(
"avx512.mask.vpdpwssds.") ||
4541 Name.starts_with(
"avx512.maskz.vpdpwssds.")) {
4542 bool ZeroMask = Name[11] ==
'z';
4543 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4546 if (VecWidth == 128 && !IsSaturating)
4547 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4548 else if (VecWidth == 256 && !IsSaturating)
4549 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4550 else if (VecWidth == 512 && !IsSaturating)
4551 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4552 else if (VecWidth == 128 && IsSaturating)
4553 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4554 else if (VecWidth == 256 && IsSaturating)
4555 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4556 else if (VecWidth == 512 && IsSaturating)
4557 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4567 if (Args[1]->
getType()->isVectorTy() &&
4570 ->isIntegerTy(32) &&
4571 Args[2]->
getType()->isVectorTy() &&
4574 ->isIntegerTy(32)) {
4575 Type *NewArgType =
nullptr;
4576 if (VecWidth == 128)
4578 else if (VecWidth == 256)
4580 else if (VecWidth == 512)
4586 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4587 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4590 Rep = Builder.CreateIntrinsic(IID, Args);
4594 }
else if (Name ==
"addcarryx.u32" || Name ==
"addcarryx.u64" ||
4595 Name ==
"addcarry.u32" || Name ==
"addcarry.u64" ||
4596 Name ==
"subborrow.u32" || Name ==
"subborrow.u64") {
4598 if (Name[0] ==
'a' && Name.back() ==
'2')
4599 IID = Intrinsic::x86_addcarry_32;
4600 else if (Name[0] ==
'a' && Name.back() ==
'4')
4601 IID = Intrinsic::x86_addcarry_64;
4602 else if (Name[0] ==
's' && Name.back() ==
'2')
4603 IID = Intrinsic::x86_subborrow_32;
4604 else if (Name[0] ==
's' && Name.back() ==
'4')
4605 IID = Intrinsic::x86_subborrow_64;
4612 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4615 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
4618 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4622 }
else if (Name.starts_with(
"avx512.mask.") &&
4625 }
else if (Name.starts_with(
"bmi.pdep.")) {
4627 }
else if (Name.starts_with(
"bmi.pext.")) {
4637 if (Name.starts_with(
"neon.bfcvt")) {
4638 if (Name.starts_with(
"neon.bfcvtn2")) {
4640 std::iota(LoMask.
begin(), LoMask.
end(), 0);
4642 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4643 Value *Inactive = Builder.CreateShuffleVector(CI->
getOperand(0), LoMask);
4646 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4647 }
else if (Name.starts_with(
"neon.bfcvtn")) {
4649 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4653 dbgs() <<
"Trunc: " << *Trunc <<
"\n";
4654 return Builder.CreateShuffleVector(
4657 return Builder.CreateFPTrunc(CI->
getOperand(0),
4660 }
else if (Name.starts_with(
"sve.fcvt")) {
4663 .
Case(
"sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4664 .
Case(
"sve.fcvtnt.bf16f32",
4665 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4677 if (Args[1]->
getType() != BadPredTy)
4680 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4681 BadPredTy, Args[1]);
4682 Args[1] = Builder.CreateIntrinsic(
4683 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4685 return Builder.CreateIntrinsic(NewID, Args,
nullptr,
4689 if (Name ==
"neon.vcvtfp2hf")
4690 return Builder.CreateBitCast(
4691 Builder.CreateFPTrunc(
4695 if (Name ==
"neon.vcvthf2fp")
4696 return Builder.CreateFPExt(
4697 Builder.CreateBitCast(
4707 if (Name ==
"mve.vctp64.old") {
4710 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4713 Value *C1 = Builder.CreateIntrinsic(
4714 Intrinsic::arm_mve_pred_v2i,
4716 return Builder.CreateIntrinsic(
4717 Intrinsic::arm_mve_pred_i2v,
4719 }
else if (Name ==
"mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4720 Name ==
"mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4721 Name ==
"mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4722 Name ==
"mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4724 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4725 Name ==
"mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4726 Name ==
"mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4727 Name ==
"mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4729 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4730 Name ==
"mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4731 Name ==
"cde.vcx1q.predicated.v2i64.v4i1" ||
4732 Name ==
"cde.vcx1qa.predicated.v2i64.v4i1" ||
4733 Name ==
"cde.vcx2q.predicated.v2i64.v4i1" ||
4734 Name ==
"cde.vcx2qa.predicated.v2i64.v4i1" ||
4735 Name ==
"cde.vcx3q.predicated.v2i64.v4i1" ||
4736 Name ==
"cde.vcx3qa.predicated.v2i64.v4i1") {
4737 std::vector<Type *> Tys;
4741 case Intrinsic::arm_mve_mull_int_predicated:
4742 case Intrinsic::arm_mve_vqdmull_predicated:
4743 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4746 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4747 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4748 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4752 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4756 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4760 case Intrinsic::arm_cde_vcx1q_predicated:
4761 case Intrinsic::arm_cde_vcx1qa_predicated:
4762 case Intrinsic::arm_cde_vcx2q_predicated:
4763 case Intrinsic::arm_cde_vcx2qa_predicated:
4764 case Intrinsic::arm_cde_vcx3q_predicated:
4765 case Intrinsic::arm_cde_vcx3qa_predicated:
4772 std::vector<Value *>
Ops;
4774 Type *Ty =
Op->getType();
4775 if (Ty->getScalarSizeInBits() == 1) {
4776 Value *C1 = Builder.CreateIntrinsic(
4777 Intrinsic::arm_mve_pred_v2i,
4779 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4784 return Builder.CreateIntrinsic(
ID, Tys,
Ops,
nullptr,
4799 auto UpgradeLegacyWMMAIUIntrinsicCall =
4804 Args.push_back(Builder.getFalse());
4808 F->getParent(),
F->getIntrinsicID(), OverloadTys);
4815 auto *NewCall =
cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4820 NewCall->copyMetadata(*CI);
4824 if (
F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
4825 assert(CI->
arg_size() == 7 &&
"Legacy int_amdgcn_wmma_i32_16x16x64_iu8 "
4826 "intrinsic should have 7 arguments");
4829 return UpgradeLegacyWMMAIUIntrinsicCall(
F, CI, Builder, {
T1, T2});
4831 if (
F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8) {
4832 assert(CI->
arg_size() == 8 &&
"Legacy int_amdgcn_swmmac_i32_16x16x128_iu8 "
4833 "intrinsic should have 8 arguments");
4838 return UpgradeLegacyWMMAIUIntrinsicCall(
F, CI, Builder, {
T1, T2, T3, T4});
4841 switch (
F->getIntrinsicID()) {
4844 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
4845 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
4846 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
4847 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
4848 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
4849 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16: {
4864 if (
F->getIntrinsicID() == Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16)
4867 F->getParent(),
F->getIntrinsicID(), Overloads);
4872 auto *NewCall =
cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4877 NewCall->copyMetadata(*CI);
4878 NewCall->takeName(CI);
4900 if (NumOperands < 3)
4913 bool IsVolatile =
false;
4917 if (NumOperands > 3)
4922 if (NumOperands > 5) {
4924 IsVolatile = !VolatileArg || !VolatileArg->
isZero();
4938 if (VT->getElementType()->isIntegerTy(16)) {
4941 Val = Builder.CreateBitCast(Val, AsBF16);
4949 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4951 unsigned AddrSpace = PtrTy->getAddressSpace();
4954 RMW->
setMetadata(
"amdgpu.no.fine.grained.memory", EmptyMD);
4956 RMW->
setMetadata(
"amdgpu.ignore.denormal.mode", EmptyMD);
4961 MDNode *RangeNotPrivate =
4964 RMW->
setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4970 return Builder.CreateBitCast(RMW, RetTy);
4991 return MAV->getMetadata();
5000 if (Name ==
"label") {
5002 }
else if (Name ==
"assign") {
5009 }
else if (Name ==
"declare") {
5013 }
else if (Name ==
"addr") {
5023 unwrapMAVOp(CI, 1), ExprNode,
nullptr,
nullptr,
nullptr);
5024 }
else if (Name ==
"value") {
5027 unsigned ExprOp = 2;
5042 assert(DR &&
"Unhandled intrinsic kind in upgrade to DbgRecord");
5050 int64_t OffsetVal =
Offset->getSExtValue();
5051 return Builder.CreateIntrinsic(OffsetVal >= 0
5052 ? Intrinsic::vector_splice_left
5053 : Intrinsic::vector_splice_right,
5055 {CI->getArgOperand(0), CI->getArgOperand(1),
5056 Builder.getInt32(std::abs(OffsetVal))});
5061 if (Name.starts_with(
"to.fp16")) {
5063 Builder.CreateFPTrunc(CI->
getArgOperand(0), Builder.getHalfTy());
5064 return Builder.CreateBitCast(Cast, CI->
getType());
5067 if (Name.starts_with(
"from.fp16")) {
5069 Builder.CreateBitCast(CI->
getArgOperand(0), Builder.getHalfTy());
5070 return Builder.CreateFPExt(Cast, CI->
getType());
5095 if (!Name.consume_front(
"llvm."))
5098 bool IsX86 = Name.consume_front(
"x86.");
5099 bool IsNVVM = Name.consume_front(
"nvvm.");
5100 bool IsAArch64 = Name.consume_front(
"aarch64.");
5101 bool IsARM = Name.consume_front(
"arm.");
5102 bool IsAMDGCN = Name.consume_front(
"amdgcn.");
5103 bool IsDbg = Name.consume_front(
"dbg.");
5105 (Name.consume_front(
"experimental.vector.splice") ||
5106 Name.consume_front(
"vector.splice")) &&
5107 !(Name.starts_with(
".left") || Name.starts_with(
".right"));
5108 Value *Rep =
nullptr;
5110 if (!IsX86 && Name ==
"stackprotectorcheck") {
5112 }
else if (IsNVVM) {
5116 }
else if (IsAArch64) {
5120 }
else if (IsAMDGCN) {
5124 }
else if (IsOldSplice) {
5126 }
else if (Name.consume_front(
"convert.")) {
5138 const auto &DefaultCase = [&]() ->
void {
5146 "Unknown function for CallBase upgrade and isn't just a name change");
5154 "Return type must have changed");
5155 assert(OldST->getNumElements() ==
5157 "Must have same number of elements");
5160 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
5163 for (
unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
5164 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
5165 Res = Builder.CreateInsertValue(Res, Elem, Idx);
5184 case Intrinsic::arm_neon_vst1:
5185 case Intrinsic::arm_neon_vst2:
5186 case Intrinsic::arm_neon_vst3:
5187 case Intrinsic::arm_neon_vst4:
5188 case Intrinsic::arm_neon_vst2lane:
5189 case Intrinsic::arm_neon_vst3lane:
5190 case Intrinsic::arm_neon_vst4lane: {
5192 NewCall = Builder.CreateCall(NewFn, Args);
5195 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
5196 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
5197 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
5202 NewCall = Builder.CreateCall(NewFn, Args);
5205 case Intrinsic::aarch64_sve_ld3_sret:
5206 case Intrinsic::aarch64_sve_ld4_sret:
5207 case Intrinsic::aarch64_sve_ld2_sret: {
5215 Name = Name.substr(5);
5222 unsigned MinElts = RetTy->getMinNumElements() /
N;
5224 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
5226 for (
unsigned I = 0;
I <
N;
I++) {
5227 Value *SRet = Builder.CreateExtractValue(NewLdCall,
I);
5228 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet,
I * MinElts);
5234 case Intrinsic::coro_end: {
5237 NewCall = Builder.CreateCall(NewFn, Args);
5241 case Intrinsic::vector_extract: {
5243 Name = Name.substr(5);
5244 if (!Name.starts_with(
"aarch64.sve.tuple.get")) {
5249 unsigned MinElts = RetTy->getMinNumElements();
5252 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0), NewIdx});
5256 case Intrinsic::vector_insert: {
5258 Name = Name.substr(5);
5259 if (!Name.starts_with(
"aarch64.sve.tuple")) {
5263 if (Name.starts_with(
"aarch64.sve.tuple.set")) {
5268 NewCall = Builder.CreateCall(
5272 if (Name.starts_with(
"aarch64.sve.tuple.create")) {
5278 assert(
N > 1 &&
"Create is expected to be between 2-4");
5281 unsigned MinElts = RetTy->getMinNumElements() /
N;
5282 for (
unsigned I = 0;
I <
N;
I++) {
5284 Ret = Builder.CreateInsertVector(RetTy, Ret, V,
I * MinElts);
5291 case Intrinsic::arm_neon_bfdot:
5292 case Intrinsic::arm_neon_bfmmla:
5293 case Intrinsic::arm_neon_bfmlalb:
5294 case Intrinsic::arm_neon_bfmlalt:
5295 case Intrinsic::aarch64_neon_bfdot:
5296 case Intrinsic::aarch64_neon_bfmmla:
5297 case Intrinsic::aarch64_neon_bfmlalb:
5298 case Intrinsic::aarch64_neon_bfmlalt: {
5301 "Mismatch between function args and call args");
5302 size_t OperandWidth =
5304 assert((OperandWidth == 64 || OperandWidth == 128) &&
5305 "Unexpected operand width");
5307 auto Iter = CI->
args().begin();
5308 Args.push_back(*Iter++);
5309 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5310 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5311 NewCall = Builder.CreateCall(NewFn, Args);
5315 case Intrinsic::bitreverse:
5316 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5319 case Intrinsic::ctlz:
5320 case Intrinsic::cttz: {
5327 Builder.CreateCall(NewFn, {CI->
getArgOperand(0), Builder.getFalse()});
5331 case Intrinsic::objectsize: {
5332 Value *NullIsUnknownSize =
5336 NewCall = Builder.CreateCall(
5341 case Intrinsic::ctpop:
5342 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5344 case Intrinsic::dbg_value: {
5346 Name = Name.substr(5);
5348 if (Name.starts_with(
"dbg.addr")) {
5362 if (
Offset->isNullValue()) {
5363 NewCall = Builder.CreateCall(
5372 case Intrinsic::ptr_annotation:
5380 NewCall = Builder.CreateCall(
5389 case Intrinsic::var_annotation:
5396 NewCall = Builder.CreateCall(
5405 case Intrinsic::riscv_aes32dsi:
5406 case Intrinsic::riscv_aes32dsmi:
5407 case Intrinsic::riscv_aes32esi:
5408 case Intrinsic::riscv_aes32esmi:
5409 case Intrinsic::riscv_sm4ks:
5410 case Intrinsic::riscv_sm4ed: {
5420 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5421 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5427 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5428 Value *Res = NewCall;
5430 Res = Builder.CreateIntCast(NewCall, CI->
getType(),
true);
5436 case Intrinsic::nvvm_mapa_shared_cluster: {
5440 Value *Res = NewCall;
5441 Res = Builder.CreateAddrSpaceCast(
5448 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5449 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5452 Args[0] = Builder.CreateAddrSpaceCast(
5455 NewCall = Builder.CreateCall(NewFn, Args);
5461 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5462 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5463 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5464 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5465 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5466 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5467 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5468 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5475 Args[0] = Builder.CreateAddrSpaceCast(
5484 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5486 NewCall = Builder.CreateCall(NewFn, Args);
5492 case Intrinsic::riscv_sha256sig0:
5493 case Intrinsic::riscv_sha256sig1:
5494 case Intrinsic::riscv_sha256sum0:
5495 case Intrinsic::riscv_sha256sum1:
5496 case Intrinsic::riscv_sm3p0:
5497 case Intrinsic::riscv_sm3p1: {
5504 Builder.CreateTrunc(CI->
getArgOperand(0), Builder.getInt32Ty());
5506 NewCall = Builder.CreateCall(NewFn, Arg);
5508 Builder.CreateIntCast(NewCall, CI->
getType(),
true);
5515 case Intrinsic::x86_xop_vfrcz_ss:
5516 case Intrinsic::x86_xop_vfrcz_sd:
5517 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(1)});
5520 case Intrinsic::x86_xop_vpermil2pd:
5521 case Intrinsic::x86_xop_vpermil2ps:
5522 case Intrinsic::x86_xop_vpermil2pd_256:
5523 case Intrinsic::x86_xop_vpermil2ps_256: {
5527 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5528 NewCall = Builder.CreateCall(NewFn, Args);
5532 case Intrinsic::x86_sse41_ptestc:
5533 case Intrinsic::x86_sse41_ptestz:
5534 case Intrinsic::x86_sse41_ptestnzc: {
5548 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy,
"cast");
5549 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
5551 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5555 case Intrinsic::x86_rdtscp: {
5561 NewCall = Builder.CreateCall(NewFn);
5563 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
5566 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5574 case Intrinsic::x86_sse41_insertps:
5575 case Intrinsic::x86_sse41_dppd:
5576 case Intrinsic::x86_sse41_dpps:
5577 case Intrinsic::x86_sse41_mpsadbw:
5578 case Intrinsic::x86_avx_dp_ps_256:
5579 case Intrinsic::x86_avx2_mpsadbw: {
5585 Args.back() = Builder.CreateTrunc(Args.back(),
Type::getInt8Ty(
C),
"trunc");
5586 NewCall = Builder.CreateCall(NewFn, Args);
5590 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5591 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5592 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5593 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5594 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5595 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5601 NewCall = Builder.CreateCall(NewFn, Args);
5610 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5611 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5612 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5613 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5614 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5615 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5619 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5620 Args[1] = Builder.CreateBitCast(
5623 NewCall = Builder.CreateCall(NewFn, Args);
5624 Value *Res = Builder.CreateBitCast(
5632 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5633 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5634 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5638 Args[1] = Builder.CreateBitCast(
5640 Args[2] = Builder.CreateBitCast(
5643 NewCall = Builder.CreateCall(NewFn, Args);
5647 case Intrinsic::thread_pointer: {
5648 NewCall = Builder.CreateCall(NewFn, {});
5652 case Intrinsic::memcpy:
5653 case Intrinsic::memmove:
5654 case Intrinsic::memset: {
5670 NewCall = Builder.CreateCall(NewFn, Args);
5672 AttributeList NewAttrs = AttributeList::get(
5673 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5674 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5675 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5680 MemCI->setDestAlignment(
Align->getMaybeAlignValue());
5683 MTI->setSourceAlignment(
Align->getMaybeAlignValue());
5687 case Intrinsic::masked_load:
5688 case Intrinsic::masked_gather:
5689 case Intrinsic::masked_store:
5690 case Intrinsic::masked_scatter: {
5696 auto GetMaybeAlign = [](
Value *
Op) {
5706 auto GetAlign = [&](
Value *
Op) {
5715 case Intrinsic::masked_load:
5716 NewCall = Builder.CreateMaskedLoad(
5720 case Intrinsic::masked_gather:
5721 NewCall = Builder.CreateMaskedGather(
5727 case Intrinsic::masked_store:
5728 NewCall = Builder.CreateMaskedStore(
5732 case Intrinsic::masked_scatter:
5733 NewCall = Builder.CreateMaskedScatter(
5735 DL.getValueOrABITypeAlignment(
5749 case Intrinsic::lifetime_start:
5750 case Intrinsic::lifetime_end: {
5762 NewCall = Builder.CreateLifetimeStart(Ptr);
5764 NewCall = Builder.CreateLifetimeEnd(Ptr);
5773 case Intrinsic::x86_avx512_vpdpbusd_128:
5774 case Intrinsic::x86_avx512_vpdpbusd_256:
5775 case Intrinsic::x86_avx512_vpdpbusd_512:
5776 case Intrinsic::x86_avx512_vpdpbusds_128:
5777 case Intrinsic::x86_avx512_vpdpbusds_256:
5778 case Intrinsic::x86_avx512_vpdpbusds_512:
5779 case Intrinsic::x86_avx2_vpdpbssd_128:
5780 case Intrinsic::x86_avx2_vpdpbssd_256:
5781 case Intrinsic::x86_avx10_vpdpbssd_512:
5782 case Intrinsic::x86_avx2_vpdpbssds_128:
5783 case Intrinsic::x86_avx2_vpdpbssds_256:
5784 case Intrinsic::x86_avx10_vpdpbssds_512:
5785 case Intrinsic::x86_avx2_vpdpbsud_128:
5786 case Intrinsic::x86_avx2_vpdpbsud_256:
5787 case Intrinsic::x86_avx10_vpdpbsud_512:
5788 case Intrinsic::x86_avx2_vpdpbsuds_128:
5789 case Intrinsic::x86_avx2_vpdpbsuds_256:
5790 case Intrinsic::x86_avx10_vpdpbsuds_512:
5791 case Intrinsic::x86_avx2_vpdpbuud_128:
5792 case Intrinsic::x86_avx2_vpdpbuud_256:
5793 case Intrinsic::x86_avx10_vpdpbuud_512:
5794 case Intrinsic::x86_avx2_vpdpbuuds_128:
5795 case Intrinsic::x86_avx2_vpdpbuuds_256:
5796 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5801 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5802 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5804 NewCall = Builder.CreateCall(NewFn, Args);
5807 case Intrinsic::x86_avx512_vpdpwssd_128:
5808 case Intrinsic::x86_avx512_vpdpwssd_256:
5809 case Intrinsic::x86_avx512_vpdpwssd_512:
5810 case Intrinsic::x86_avx512_vpdpwssds_128:
5811 case Intrinsic::x86_avx512_vpdpwssds_256:
5812 case Intrinsic::x86_avx512_vpdpwssds_512:
5813 case Intrinsic::x86_avx2_vpdpwsud_128:
5814 case Intrinsic::x86_avx2_vpdpwsud_256:
5815 case Intrinsic::x86_avx10_vpdpwsud_512:
5816 case Intrinsic::x86_avx2_vpdpwsuds_128:
5817 case Intrinsic::x86_avx2_vpdpwsuds_256:
5818 case Intrinsic::x86_avx10_vpdpwsuds_512:
5819 case Intrinsic::x86_avx2_vpdpwusd_128:
5820 case Intrinsic::x86_avx2_vpdpwusd_256:
5821 case Intrinsic::x86_avx10_vpdpwusd_512:
5822 case Intrinsic::x86_avx2_vpdpwusds_128:
5823 case Intrinsic::x86_avx2_vpdpwusds_256:
5824 case Intrinsic::x86_avx10_vpdpwusds_512:
5825 case Intrinsic::x86_avx2_vpdpwuud_128:
5826 case Intrinsic::x86_avx2_vpdpwuud_256:
5827 case Intrinsic::x86_avx10_vpdpwuud_512:
5828 case Intrinsic::x86_avx2_vpdpwuuds_128:
5829 case Intrinsic::x86_avx2_vpdpwuuds_256:
5830 case Intrinsic::x86_avx10_vpdpwuuds_512:
5835 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5836 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5838 NewCall = Builder.CreateCall(NewFn, Args);
5841 assert(NewCall &&
"Should have either set this variable or returned through "
5842 "the default case");
5849 assert(
F &&
"Illegal attempt to upgrade a non-existent intrinsic.");
5863 F->eraseFromParent();
5869 if (NumOperands == 0)
5877 if (NumOperands == 3) {
5881 Metadata *Elts2[] = {ScalarType, ScalarType,
5895 if (
Opc != Instruction::BitCast)
5899 Type *SrcTy = V->getType();
5916 if (
Opc != Instruction::BitCast)
5919 Type *SrcTy =
C->getType();
5946 if (
NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5947 auto OpIt =
find_if(ModFlags->operands(), [](
const MDNode *Flag) {
5948 if (Flag->getNumOperands() < 3)
5950 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5951 return K->getString() ==
"Debug Info Version";
5954 if (OpIt != ModFlags->op_end()) {
5955 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5962 bool BrokenDebugInfo =
false;
5965 if (!BrokenDebugInfo)
5971 M.getContext().diagnose(Diag);
5978 M.getContext().diagnose(DiagVersion);
5988 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5991 if (
F->hasFnAttribute(Attr)) {
5994 StringRef S =
F->getFnAttribute(Attr).getValueAsString();
5996 auto [Part, Rest] = S.
split(
',');
6002 const unsigned Dim = DimC -
'x';
6003 assert(Dim < 3 &&
"Unexpected dim char");
6013 F->addFnAttr(Attr, NewAttr);
6017 return S ==
"x" || S ==
"y" || S ==
"z";
6022 if (K ==
"kernel") {
6034 const unsigned Idx = (AlignIdxValuePair >> 16);
6035 const Align StackAlign =
Align(AlignIdxValuePair & 0xFFFF);
6040 if (K ==
"maxclusterrank" || K ==
"cluster_max_blocks") {
6045 if (K ==
"minctasm") {
6050 if (K ==
"maxnreg") {
6055 if (K.consume_front(
"maxntid") &&
isXYZ(K)) {
6059 if (K.consume_front(
"reqntid") &&
isXYZ(K)) {
6063 if (K.consume_front(
"cluster_dim_") &&
isXYZ(K)) {
6067 if (K ==
"grid_constant") {
6082 NamedMDNode *NamedMD = M.getNamedMetadata(
"nvvm.annotations");
6089 if (!SeenNodes.
insert(MD).second)
6096 assert((MD->getNumOperands() % 2) == 1 &&
"Invalid number of operands");
6103 for (
unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
6105 const MDOperand &V = MD->getOperand(j + 1);
6108 NewOperands.
append({K, V});
6111 if (NewOperands.
size() > 1)
6124 const char *MarkerKey =
"clang.arc.retainAutoreleasedReturnValueMarker";
6125 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
6126 if (ModRetainReleaseMarker) {
6132 ID->getString().split(ValueComp,
"#");
6133 if (ValueComp.
size() == 2) {
6134 std::string NewValue = ValueComp[0].str() +
";" + ValueComp[1].str();
6138 M.eraseNamedMetadata(ModRetainReleaseMarker);
6149 auto UpgradeToIntrinsic = [&](
const char *OldFunc,
6175 bool InvalidCast =
false;
6177 for (
unsigned I = 0, E = CI->
arg_size();
I != E; ++
I) {
6190 Arg = Builder.CreateBitCast(Arg, NewFuncTy->
getParamType(
I));
6192 Args.push_back(Arg);
6199 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
6204 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->
getType());
6217 UpgradeToIntrinsic(
"clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
6225 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
6226 {
"objc_autorelease", llvm::Intrinsic::objc_autorelease},
6227 {
"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
6228 {
"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
6229 {
"objc_autoreleaseReturnValue",
6230 llvm::Intrinsic::objc_autoreleaseReturnValue},
6231 {
"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
6232 {
"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
6233 {
"objc_initWeak", llvm::Intrinsic::objc_initWeak},
6234 {
"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
6235 {
"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
6236 {
"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
6237 {
"objc_release", llvm::Intrinsic::objc_release},
6238 {
"objc_retain", llvm::Intrinsic::objc_retain},
6239 {
"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
6240 {
"objc_retainAutoreleaseReturnValue",
6241 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
6242 {
"objc_retainAutoreleasedReturnValue",
6243 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
6244 {
"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
6245 {
"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
6246 {
"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
6247 {
"objc_unsafeClaimAutoreleasedReturnValue",
6248 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
6249 {
"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
6250 {
"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
6251 {
"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
6252 {
"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
6253 {
"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
6254 {
"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
6255 {
"objc_arc_annotation_topdown_bbstart",
6256 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
6257 {
"objc_arc_annotation_topdown_bbend",
6258 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
6259 {
"objc_arc_annotation_bottomup_bbstart",
6260 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
6261 {
"objc_arc_annotation_bottomup_bbend",
6262 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
6264 for (
auto &
I : RuntimeFuncs)
6265 UpgradeToIntrinsic(
I.first,
I.second);
6269 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6273 bool HasObjCFlag =
false, HasClassProperties =
false,
Changed =
false;
6274 bool HasSwiftVersionFlag =
false;
6275 uint8_t SwiftMajorVersion, SwiftMinorVersion;
6282 if (
Op->getNumOperands() != 3)
6296 if (
ID->getString() ==
"Objective-C Image Info Version")
6298 if (
ID->getString() ==
"Objective-C Class Properties")
6299 HasClassProperties =
true;
6301 if (
ID->getString() ==
"PIC Level") {
6302 if (
auto *Behavior =
6304 uint64_t V = Behavior->getLimitedValue();
6310 if (
ID->getString() ==
"PIE Level")
6311 if (
auto *Behavior =
6318 if (
ID->getString() ==
"branch-target-enforcement" ||
6319 ID->getString().starts_with(
"sign-return-address")) {
6320 if (
auto *Behavior =
6326 Op->getOperand(1),
Op->getOperand(2)};
6336 if (
ID->getString() ==
"Objective-C Image Info Section") {
6339 Value->getString().split(ValueComp,
" ");
6340 if (ValueComp.
size() != 1) {
6341 std::string NewValue;
6342 for (
auto &S : ValueComp)
6343 NewValue += S.str();
6354 if (
ID->getString() ==
"Objective-C Garbage Collection") {
6357 assert(Md->getValue() &&
"Expected non-empty metadata");
6358 auto Type = Md->getValue()->getType();
6361 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6362 if ((Val & 0xff) != Val) {
6363 HasSwiftVersionFlag =
true;
6364 SwiftABIVersion = (Val & 0xff00) >> 8;
6365 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6366 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6377 if (
ID->getString() ==
"amdgpu_code_object_version") {
6380 MDString::get(M.getContext(),
"amdhsa_code_object_version"),
6392 if (HasObjCFlag && !HasClassProperties) {
6398 if (HasSwiftVersionFlag) {
6402 ConstantInt::get(Int8Ty, SwiftMajorVersion));
6404 ConstantInt::get(Int8Ty, SwiftMinorVersion));
6412 NamedMDNode *CFIConsts = M.getNamedMetadata(
"cfi.functions");
6416 auto MatchesVersion = [](
const MDNode *
Op) {
6417 return Op->getNumOperands() >= 3 &&
6431 assert(!MatchesVersion(
Op) &&
"Unexpected mix of CFIConstant formats");
6432 assert(
Op->getNumOperands() >= 2 &&
6433 "Expected at least 2 operands - name and linkage type");
6445 for (
unsigned J = 2, EJ =
Op->getNumOperands(); J != EJ; ++J)
6456 auto TrimSpaces = [](
StringRef Section) -> std::string {
6458 Section.split(Components,
',');
6463 for (
auto Component : Components)
6464 OS <<
',' << Component.trim();
6469 for (
auto &GV : M.globals()) {
6470 if (!GV.hasSection())
6475 if (!Section.starts_with(
"__DATA, __objc_catlist"))
6480 GV.setSection(TrimSpaces(Section));
6496struct StrictFPUpgradeVisitor :
public InstVisitor<StrictFPUpgradeVisitor> {
6497 StrictFPUpgradeVisitor() =
default;
6500 if (!
Call.isStrictFP())
6506 Call.removeFnAttr(Attribute::StrictFP);
6507 Call.addFnAttr(Attribute::NoBuiltin);
6512struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6513 :
public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6514 AMDGPUUnsafeFPAtomicsUpgradeVisitor() =
default;
6516 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6531 if (!
F.isDeclaration() && !
F.hasFnAttribute(Attribute::StrictFP)) {
6532 StrictFPUpgradeVisitor SFPV;
6537 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6538 F.getReturnType(),
F.getAttributes().getRetAttrs()));
6539 for (
auto &Arg :
F.args())
6541 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6543 bool AddingAttrs =
false, RemovingAttrs =
false;
6544 AttrBuilder AttrsToAdd(
F.getContext());
6549 if (
Attribute A =
F.getFnAttribute(
"implicit-section-name");
6550 A.isValid() &&
A.isStringAttribute()) {
6551 F.setSection(
A.getValueAsString());
6553 RemovingAttrs =
true;
6557 A.isValid() &&
A.isStringAttribute()) {
6560 AddingAttrs = RemovingAttrs =
true;
6563 if (
Attribute A =
F.getFnAttribute(
"uniform-work-group-size");
6564 A.isValid() &&
A.isStringAttribute() && !
A.getValueAsString().empty()) {
6566 RemovingAttrs =
true;
6567 if (
A.getValueAsString() ==
"true") {
6568 AttrsToAdd.addAttribute(
"uniform-work-group-size");
6577 if (
Attribute A =
F.getFnAttribute(
"amdgpu-unsafe-fp-atomics");
6580 if (
A.getValueAsBool()) {
6581 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6587 AttrsToRemove.
addAttribute(
"amdgpu-unsafe-fp-atomics");
6588 RemovingAttrs =
true;
6595 bool HandleDenormalMode =
false;
6597 if (
Attribute Attr =
F.getFnAttribute(
"denormal-fp-math"); Attr.isValid()) {
6600 DenormalFPMath = ParsedMode;
6602 AddingAttrs = RemovingAttrs =
true;
6603 HandleDenormalMode =
true;
6607 if (
Attribute Attr =
F.getFnAttribute(
"denormal-fp-math-f32");
6611 DenormalFPMathF32 = ParsedMode;
6613 AddingAttrs = RemovingAttrs =
true;
6614 HandleDenormalMode =
true;
6618 if (HandleDenormalMode)
6619 AttrsToAdd.addDenormalFPEnvAttr(
6623 F.removeFnAttrs(AttrsToRemove);
6626 F.addFnAttrs(AttrsToAdd);
6632 if (!
F.hasFnAttribute(FnAttrName))
6633 F.addFnAttr(FnAttrName,
Value);
6640 if (!
F.hasFnAttribute(FnAttrName)) {
6642 F.addFnAttr(FnAttrName);
6644 auto A =
F.getFnAttribute(FnAttrName);
6645 if (
"false" ==
A.getValueAsString())
6646 F.removeFnAttr(FnAttrName);
6647 else if (
"true" ==
A.getValueAsString()) {
6648 F.removeFnAttr(FnAttrName);
6649 F.addFnAttr(FnAttrName);
6655 Triple T(M.getTargetTriple());
6656 if (!
T.isThumb() && !
T.isARM() && !
T.isAArch64())
6666 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6670 if (
Op->getNumOperands() != 3)
6679 uint64_t *ValPtr = IDStr ==
"branch-target-enforcement" ? &BTEValue
6680 : IDStr ==
"branch-protection-pauth-lr" ? &BPPLRValue
6681 : IDStr ==
"guarded-control-stack" ? &GCSValue
6682 : IDStr ==
"sign-return-address" ? &SRAValue
6683 : IDStr ==
"sign-return-address-all" ? &SRAALLValue
6684 : IDStr ==
"sign-return-address-with-bkey"
6690 *ValPtr = CI->getZExtValue();
6696 bool BTE = BTEValue == 1;
6697 bool BPPLR = BPPLRValue == 1;
6698 bool GCS = GCSValue == 1;
6699 bool SRA = SRAValue == 1;
6702 if (SRA && SRAALLValue == 1)
6703 SignTypeValue =
"all";
6706 if (SRA && SRABKeyValue == 1)
6707 SignKeyValue =
"b_key";
6709 for (
Function &
F : M.getFunctionList()) {
6710 if (
F.isDeclaration())
6717 if (
auto A =
F.getFnAttribute(
"sign-return-address");
6718 A.isValid() &&
"none" ==
A.getValueAsString()) {
6719 F.removeFnAttr(
"sign-return-address");
6720 F.removeFnAttr(
"sign-return-address-key");
6736 if (SRAALLValue == 1)
6738 if (SRABKeyValue == 1)
6747 if (
T->getNumOperands() < 1)
6752 return S->getString().starts_with(
"llvm.vectorizer.");
6756 StringRef OldPrefix =
"llvm.vectorizer.";
6759 if (OldTag ==
"llvm.vectorizer.unroll")
6771 if (
T->getNumOperands() < 1)
6776 if (!OldTag->getString().starts_with(
"llvm.vectorizer."))
6781 Ops.reserve(
T->getNumOperands());
6783 for (
unsigned I = 1,
E =
T->getNumOperands();
I !=
E; ++
I)
6784 Ops.push_back(
T->getOperand(
I));
6798 Ops.reserve(
T->getNumOperands());
6809 if ((
T.isSPIR() || (
T.isSPIRV() && !
T.isSPIRVLogical())) &&
6810 !
DL.contains(
"-G") && !
DL.starts_with(
"G")) {
6811 return DL.empty() ? std::string(
"G1") : (
DL +
"-G1").str();
6814 if (
T.isLoongArch64() ||
T.isRISCV64()) {
6816 auto I =
DL.find(
"-n64-");
6818 return (
DL.take_front(
I) +
"-n32:64-" +
DL.drop_front(
I + 5)).str();
6823 std::string Res =
DL.str();
6826 if (!
DL.contains(
"-G") && !
DL.starts_with(
"G"))
6827 Res.append(Res.empty() ?
"G1" :
"-G1");
6835 if (!
DL.contains(
"-ni") && !
DL.starts_with(
"ni"))
6836 Res.append(
"-ni:7:8:9");
6838 if (
DL.ends_with(
"ni:7"))
6840 if (
DL.ends_with(
"ni:7:8"))
6845 if (!
DL.contains(
"-p7") && !
DL.starts_with(
"p7"))
6846 Res.append(
"-p7:160:256:256:32");
6847 if (!
DL.contains(
"-p8") && !
DL.starts_with(
"p8"))
6848 Res.append(
"-p8:128:128:128:48");
6849 constexpr StringRef OldP8(
"-p8:128:128-");
6850 if (
DL.contains(OldP8))
6851 Res.replace(Res.find(OldP8), OldP8.
size(),
"-p8:128:128:128:48-");
6852 if (!
DL.contains(
"-p9") && !
DL.starts_with(
"p9"))
6853 Res.append(
"-p9:192:256:256:32");
6857 if (!
DL.contains(
"m:e"))
6858 Res = Res.empty() ?
"m:e" :
"m:e-" + Res;
6863 if (
T.isSystemZ() && !
DL.empty()) {
6865 if (!
DL.contains(
"-S64"))
6866 return "E-S64" +
DL.drop_front(1).str();
6870 auto AddPtr32Ptr64AddrSpaces = [&
DL, &Res]() {
6873 StringRef AddrSpaces{
"-p270:32:32-p271:32:32-p272:64:64"};
6874 if (!
DL.contains(AddrSpaces)) {
6876 Regex R(
"^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6877 if (R.match(Res, &
Groups))
6883 if (
T.isAArch64()) {
6885 if (!
DL.empty() && !
DL.contains(
"-Fn32"))
6886 Res.append(
"-Fn32");
6887 AddPtr32Ptr64AddrSpaces();
6891 if (
T.isSPARC() || (
T.isMIPS64() && !
DL.contains(
"m:m")) ||
T.isPPC64() ||
6895 std::string I64 =
"-i64:64";
6896 std::string I128 =
"-i128:128";
6898 size_t Pos = Res.find(I64);
6899 if (Pos !=
size_t(-1))
6900 Res.insert(Pos + I64.size(), I128);
6904 if (
T.isPPC() &&
T.isOSAIX() && !
DL.contains(
"f64:32:64") && !
DL.empty()) {
6905 size_t Pos = Res.find(
"-S128");
6908 Res.insert(Pos,
"-f64:32:64");
6914 AddPtr32Ptr64AddrSpaces();
6922 if (!
T.isOSIAMCU()) {
6923 std::string I128 =
"-i128:128";
6926 Regex R(
"^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6927 if (R.match(Res, &
Groups))
6935 if (
T.isWindowsMSVCEnvironment() && !
T.isArch64Bit()) {
6937 auto I =
Ref.find(
"-f80:32-");
6939 Res = (
Ref.take_front(
I) +
"-f80:128-" +
Ref.drop_front(
I + 8)).str();
6947 Attribute A =
B.getAttribute(
"no-frame-pointer-elim");
6950 FramePointer =
A.getValueAsString() ==
"true" ?
"all" :
"none";
6951 B.removeAttribute(
"no-frame-pointer-elim");
6953 if (
B.contains(
"no-frame-pointer-elim-non-leaf")) {
6955 if (FramePointer !=
"all")
6956 FramePointer =
"non-leaf";
6957 B.removeAttribute(
"no-frame-pointer-elim-non-leaf");
6959 if (!FramePointer.
empty())
6960 B.addAttribute(
"frame-pointer", FramePointer);
6962 A =
B.getAttribute(
"null-pointer-is-valid");
6965 bool NullPointerIsValid =
A.getValueAsString() ==
"true";
6966 B.removeAttribute(
"null-pointer-is-valid");
6967 if (NullPointerIsValid)
6968 B.addAttribute(Attribute::NullPointerIsValid);
6971 A =
B.getAttribute(
"uniform-work-group-size");
6975 bool IsTrue = Val ==
"true";
6976 B.removeAttribute(
"uniform-work-group-size");
6978 B.addAttribute(
"uniform-work-group-size");
6989 return OBD.
getTag() ==
"clang.arc.attachedcall" &&
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static bool convertIntrinsicValidType(StringRef Name, const FunctionType *FuncTy)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeVectorSplice(CallBase *CI, IRBuilder<> &Builder)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static void reportFatalUsageErrorWithCI(StringRef reason, CallBase *CI)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static Value * upgradeConvertIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
NVPTX address space definition.
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Class for arbitrary precision integers.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
This class stores enough information to efficiently remove some attributes from an existing AttrBuild...
AttributeMask & addAttribute(Attribute::AttrKind Val)
Add an attribute to the mask.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
void setDebugLoc(DebugLoc Loc)
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Convenience struct for specifying and reasoning about fast-math flags.
void setApproxFunc(bool B=true)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
const Function & getFunction() const
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Type * getReturnType() const
Returns the type of the ret val.
Argument * getArg(unsigned i) const
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
LinkageTypes getLinkage() const
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
static StringRef dropLLVMManglingEscape(StringRef Name)
If the given string begins with the GlobalValue name mangling escape character '\1',...
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
LLVM_ABI SyncScope::ID getOrInsertSyncScopeID(StringRef SSN)
getOrInsertSyncScopeID - Maps synchronization scope name to synchronization scope ID.
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
const MDOperand & getOperand(unsigned I) const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
unsigned getNumOperands() const
Return number of MDNode operands.
LLVMContext & getContext() const
Tracking metadata reference owned by Metadata.
LLVM_ABI StringRef getString() const
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
A Module instance is used to store all the information related to an LLVM module.
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
@ Min
Takes the min of the two values, which are required to be integers.
@ Max
Takes the max of the two values, which are required to be integers.
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
static constexpr size_t npos
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
Get the string size.
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< user_iterator > users()
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
const ParentTy * getParent() const
self_iterator getIterator()
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool isSignatureValid(Intrinsic::ID ID, FunctionType *FT, SmallVectorImpl< Type * > &OverloadTys, raw_ostream &OS=nulls())
Returns true if FT is a valid function type for intrinsic ID.
LLVM_ABI bool hasStructReturnType(ID id)
Returns true if id has a struct return type.
@ ADDRESS_SPACE_SHARED_CLUSTER
constexpr StringLiteral GridConstant("nvvm.grid_constant")
constexpr StringLiteral MaxNTID("nvvm.maxntid")
constexpr StringLiteral MaxNReg("nvvm.maxnreg")
constexpr StringLiteral MinCTASm("nvvm.minctasm")
constexpr StringLiteral ReqNTID("nvvm.reqntid")
constexpr StringLiteral MaxClusterRank("nvvm.maxclusterrank")
constexpr StringLiteral ClusterDim("nvvm.cluster_dim")
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI bool UpgradeCFIFunctionsMetadata(Module &M)
Upgrade the cfi.functions metadata node by calculating and inserting the GUID for each function entry...
LLVM_ABI void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
FunctionAddr VTableAddr uintptr_t uintptr_t Version
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
OperandBundleDefT< Value * > OperandBundleDef
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
DenormalMode parseDenormalFPAttribute(StringRef Str)
Returns the denormal mode to use for inputs and outputs.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ Default
The result value is uniform if and only if all operands are uniform.
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Represents the full denormal controls for a function, including the default mode and the f32 specific...
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getInvalid()
constexpr bool isValid() const
static constexpr DenormalMode getIEEE()
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.