34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsAMDGPU.h"
36#include "llvm/IR/IntrinsicsARM.h"
37#include "llvm/IR/IntrinsicsNVPTX.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
39#include "llvm/IR/IntrinsicsWebAssembly.h"
40#include "llvm/IR/IntrinsicsX86.h"
63 cl::desc(
"Disable autoupgrade of debug info"));
82 Type *Arg0Type =
F->getFunctionType()->getParamType(0);
97 Type *LastArgType =
F->getFunctionType()->getParamType(
98 F->getFunctionType()->getNumParams() - 1);
113 if (
F->getReturnType()->isVectorTy())
126 Type *Arg1Type =
F->getFunctionType()->getParamType(1);
127 Type *Arg2Type =
F->getFunctionType()->getParamType(2);
144 Type *Arg1Type =
F->getFunctionType()->getParamType(1);
145 Type *Arg2Type =
F->getFunctionType()->getParamType(2);
159 if (
F->getReturnType()->getScalarType()->isBFloatTy())
169 if (
F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
183 if (Name.consume_front(
"avx."))
184 return (Name.starts_with(
"blend.p") ||
185 Name ==
"cvt.ps2.pd.256" ||
186 Name ==
"cvtdq2.pd.256" ||
187 Name ==
"cvtdq2.ps.256" ||
188 Name.starts_with(
"movnt.") ||
189 Name.starts_with(
"sqrt.p") ||
190 Name.starts_with(
"storeu.") ||
191 Name.starts_with(
"vbroadcast.s") ||
192 Name.starts_with(
"vbroadcastf128") ||
193 Name.starts_with(
"vextractf128.") ||
194 Name.starts_with(
"vinsertf128.") ||
195 Name.starts_with(
"vperm2f128.") ||
196 Name.starts_with(
"vpermil."));
198 if (Name.consume_front(
"avx2."))
199 return (Name ==
"movntdqa" ||
200 Name.starts_with(
"pabs.") ||
201 Name.starts_with(
"padds.") ||
202 Name.starts_with(
"paddus.") ||
203 Name.starts_with(
"pblendd.") ||
205 Name.starts_with(
"pbroadcast") ||
206 Name.starts_with(
"pcmpeq.") ||
207 Name.starts_with(
"pcmpgt.") ||
208 Name.starts_with(
"pmax") ||
209 Name.starts_with(
"pmin") ||
210 Name.starts_with(
"pmovsx") ||
211 Name.starts_with(
"pmovzx") ||
213 Name ==
"pmulu.dq" ||
214 Name.starts_with(
"psll.dq") ||
215 Name.starts_with(
"psrl.dq") ||
216 Name.starts_with(
"psubs.") ||
217 Name.starts_with(
"psubus.") ||
218 Name.starts_with(
"vbroadcast") ||
219 Name ==
"vbroadcasti128" ||
220 Name ==
"vextracti128" ||
221 Name ==
"vinserti128" ||
222 Name ==
"vperm2i128");
224 if (Name.consume_front(
"avx512.")) {
225 if (Name.consume_front(
"mask."))
227 return (Name.starts_with(
"add.p") ||
228 Name.starts_with(
"and.") ||
229 Name.starts_with(
"andn.") ||
230 Name.starts_with(
"broadcast.s") ||
231 Name.starts_with(
"broadcastf32x4.") ||
232 Name.starts_with(
"broadcastf32x8.") ||
233 Name.starts_with(
"broadcastf64x2.") ||
234 Name.starts_with(
"broadcastf64x4.") ||
235 Name.starts_with(
"broadcasti32x4.") ||
236 Name.starts_with(
"broadcasti32x8.") ||
237 Name.starts_with(
"broadcasti64x2.") ||
238 Name.starts_with(
"broadcasti64x4.") ||
239 Name.starts_with(
"cmp.b") ||
240 Name.starts_with(
"cmp.d") ||
241 Name.starts_with(
"cmp.q") ||
242 Name.starts_with(
"cmp.w") ||
243 Name.starts_with(
"compress.b") ||
244 Name.starts_with(
"compress.d") ||
245 Name.starts_with(
"compress.p") ||
246 Name.starts_with(
"compress.q") ||
247 Name.starts_with(
"compress.store.") ||
248 Name.starts_with(
"compress.w") ||
249 Name.starts_with(
"conflict.") ||
250 Name.starts_with(
"cvtdq2pd.") ||
251 Name.starts_with(
"cvtdq2ps.") ||
252 Name ==
"cvtpd2dq.256" ||
253 Name ==
"cvtpd2ps.256" ||
254 Name ==
"cvtps2pd.128" ||
255 Name ==
"cvtps2pd.256" ||
256 Name.starts_with(
"cvtqq2pd.") ||
257 Name ==
"cvtqq2ps.256" ||
258 Name ==
"cvtqq2ps.512" ||
259 Name ==
"cvttpd2dq.256" ||
260 Name ==
"cvttps2dq.128" ||
261 Name ==
"cvttps2dq.256" ||
262 Name.starts_with(
"cvtudq2pd.") ||
263 Name.starts_with(
"cvtudq2ps.") ||
264 Name.starts_with(
"cvtuqq2pd.") ||
265 Name ==
"cvtuqq2ps.256" ||
266 Name ==
"cvtuqq2ps.512" ||
267 Name.starts_with(
"dbpsadbw.") ||
268 Name.starts_with(
"div.p") ||
269 Name.starts_with(
"expand.b") ||
270 Name.starts_with(
"expand.d") ||
271 Name.starts_with(
"expand.load.") ||
272 Name.starts_with(
"expand.p") ||
273 Name.starts_with(
"expand.q") ||
274 Name.starts_with(
"expand.w") ||
275 Name.starts_with(
"fpclass.p") ||
276 Name.starts_with(
"insert") ||
277 Name.starts_with(
"load.") ||
278 Name.starts_with(
"loadu.") ||
279 Name.starts_with(
"lzcnt.") ||
280 Name.starts_with(
"max.p") ||
281 Name.starts_with(
"min.p") ||
282 Name.starts_with(
"movddup") ||
283 Name.starts_with(
"move.s") ||
284 Name.starts_with(
"movshdup") ||
285 Name.starts_with(
"movsldup") ||
286 Name.starts_with(
"mul.p") ||
287 Name.starts_with(
"or.") ||
288 Name.starts_with(
"pabs.") ||
289 Name.starts_with(
"packssdw.") ||
290 Name.starts_with(
"packsswb.") ||
291 Name.starts_with(
"packusdw.") ||
292 Name.starts_with(
"packuswb.") ||
293 Name.starts_with(
"padd.") ||
294 Name.starts_with(
"padds.") ||
295 Name.starts_with(
"paddus.") ||
296 Name.starts_with(
"palignr.") ||
297 Name.starts_with(
"pand.") ||
298 Name.starts_with(
"pandn.") ||
299 Name.starts_with(
"pavg") ||
300 Name.starts_with(
"pbroadcast") ||
301 Name.starts_with(
"pcmpeq.") ||
302 Name.starts_with(
"pcmpgt.") ||
303 Name.starts_with(
"perm.df.") ||
304 Name.starts_with(
"perm.di.") ||
305 Name.starts_with(
"permvar.") ||
306 Name.starts_with(
"pmaddubs.w.") ||
307 Name.starts_with(
"pmaddw.d.") ||
308 Name.starts_with(
"pmax") ||
309 Name.starts_with(
"pmin") ||
310 Name ==
"pmov.qd.256" ||
311 Name ==
"pmov.qd.512" ||
312 Name ==
"pmov.wb.256" ||
313 Name ==
"pmov.wb.512" ||
314 Name.starts_with(
"pmovsx") ||
315 Name.starts_with(
"pmovzx") ||
316 Name.starts_with(
"pmul.dq.") ||
317 Name.starts_with(
"pmul.hr.sw.") ||
318 Name.starts_with(
"pmulh.w.") ||
319 Name.starts_with(
"pmulhu.w.") ||
320 Name.starts_with(
"pmull.") ||
321 Name.starts_with(
"pmultishift.qb.") ||
322 Name.starts_with(
"pmulu.dq.") ||
323 Name.starts_with(
"por.") ||
324 Name.starts_with(
"prol.") ||
325 Name.starts_with(
"prolv.") ||
326 Name.starts_with(
"pror.") ||
327 Name.starts_with(
"prorv.") ||
328 Name.starts_with(
"pshuf.b.") ||
329 Name.starts_with(
"pshuf.d.") ||
330 Name.starts_with(
"pshufh.w.") ||
331 Name.starts_with(
"pshufl.w.") ||
332 Name.starts_with(
"psll.d") ||
333 Name.starts_with(
"psll.q") ||
334 Name.starts_with(
"psll.w") ||
335 Name.starts_with(
"pslli") ||
336 Name.starts_with(
"psllv") ||
337 Name.starts_with(
"psra.d") ||
338 Name.starts_with(
"psra.q") ||
339 Name.starts_with(
"psra.w") ||
340 Name.starts_with(
"psrai") ||
341 Name.starts_with(
"psrav") ||
342 Name.starts_with(
"psrl.d") ||
343 Name.starts_with(
"psrl.q") ||
344 Name.starts_with(
"psrl.w") ||
345 Name.starts_with(
"psrli") ||
346 Name.starts_with(
"psrlv") ||
347 Name.starts_with(
"psub.") ||
348 Name.starts_with(
"psubs.") ||
349 Name.starts_with(
"psubus.") ||
350 Name.starts_with(
"pternlog.") ||
351 Name.starts_with(
"punpckh") ||
352 Name.starts_with(
"punpckl") ||
353 Name.starts_with(
"pxor.") ||
354 Name.starts_with(
"shuf.f") ||
355 Name.starts_with(
"shuf.i") ||
356 Name.starts_with(
"shuf.p") ||
357 Name.starts_with(
"sqrt.p") ||
358 Name.starts_with(
"store.b.") ||
359 Name.starts_with(
"store.d.") ||
360 Name.starts_with(
"store.p") ||
361 Name.starts_with(
"store.q.") ||
362 Name.starts_with(
"store.w.") ||
363 Name ==
"store.ss" ||
364 Name.starts_with(
"storeu.") ||
365 Name.starts_with(
"sub.p") ||
366 Name.starts_with(
"ucmp.") ||
367 Name.starts_with(
"unpckh.") ||
368 Name.starts_with(
"unpckl.") ||
369 Name.starts_with(
"valign.") ||
370 Name ==
"vcvtph2ps.128" ||
371 Name ==
"vcvtph2ps.256" ||
372 Name.starts_with(
"vextract") ||
373 Name.starts_with(
"vfmadd.") ||
374 Name.starts_with(
"vfmaddsub.") ||
375 Name.starts_with(
"vfnmadd.") ||
376 Name.starts_with(
"vfnmsub.") ||
377 Name.starts_with(
"vpdpbusd.") ||
378 Name.starts_with(
"vpdpbusds.") ||
379 Name.starts_with(
"vpdpwssd.") ||
380 Name.starts_with(
"vpdpwssds.") ||
381 Name.starts_with(
"vpermi2var.") ||
382 Name.starts_with(
"vpermil.p") ||
383 Name.starts_with(
"vpermilvar.") ||
384 Name.starts_with(
"vpermt2var.") ||
385 Name.starts_with(
"vpmadd52") ||
386 Name.starts_with(
"vpshld.") ||
387 Name.starts_with(
"vpshldv.") ||
388 Name.starts_with(
"vpshrd.") ||
389 Name.starts_with(
"vpshrdv.") ||
390 Name.starts_with(
"vpshufbitqmb.") ||
391 Name.starts_with(
"xor."));
393 if (Name.consume_front(
"mask3."))
395 return (Name.starts_with(
"vfmadd.") ||
396 Name.starts_with(
"vfmaddsub.") ||
397 Name.starts_with(
"vfmsub.") ||
398 Name.starts_with(
"vfmsubadd.") ||
399 Name.starts_with(
"vfnmsub."));
401 if (Name.consume_front(
"maskz."))
403 return (Name.starts_with(
"pternlog.") ||
404 Name.starts_with(
"vfmadd.") ||
405 Name.starts_with(
"vfmaddsub.") ||
406 Name.starts_with(
"vpdpbusd.") ||
407 Name.starts_with(
"vpdpbusds.") ||
408 Name.starts_with(
"vpdpwssd.") ||
409 Name.starts_with(
"vpdpwssds.") ||
410 Name.starts_with(
"vpermt2var.") ||
411 Name.starts_with(
"vpmadd52") ||
412 Name.starts_with(
"vpshldv.") ||
413 Name.starts_with(
"vpshrdv."));
416 return (Name ==
"movntdqa" ||
417 Name ==
"pmul.dq.512" ||
418 Name ==
"pmulu.dq.512" ||
419 Name.starts_with(
"broadcastm") ||
420 Name.starts_with(
"cmp.p") ||
421 Name.starts_with(
"cvtb2mask.") ||
422 Name.starts_with(
"cvtd2mask.") ||
423 Name.starts_with(
"cvtmask2") ||
424 Name.starts_with(
"cvtq2mask.") ||
425 Name ==
"cvtusi2sd" ||
426 Name.starts_with(
"cvtw2mask.") ||
431 Name ==
"kortestc.w" ||
432 Name ==
"kortestz.w" ||
433 Name.starts_with(
"kunpck") ||
436 Name.starts_with(
"padds.") ||
437 Name.starts_with(
"pbroadcast") ||
438 Name.starts_with(
"prol") ||
439 Name.starts_with(
"pror") ||
440 Name.starts_with(
"psll.dq") ||
441 Name.starts_with(
"psrl.dq") ||
442 Name.starts_with(
"psubs.") ||
443 Name.starts_with(
"ptestm") ||
444 Name.starts_with(
"ptestnm") ||
445 Name.starts_with(
"storent.") ||
446 Name.starts_with(
"vbroadcast.s") ||
447 Name.starts_with(
"vpshld.") ||
448 Name.starts_with(
"vpshrd."));
451 if (Name.consume_front(
"fma."))
452 return (Name.starts_with(
"vfmadd.") ||
453 Name.starts_with(
"vfmsub.") ||
454 Name.starts_with(
"vfmsubadd.") ||
455 Name.starts_with(
"vfnmadd.") ||
456 Name.starts_with(
"vfnmsub."));
458 if (Name.consume_front(
"fma4."))
459 return Name.starts_with(
"vfmadd.s");
461 if (Name.consume_front(
"sse."))
462 return (Name ==
"add.ss" ||
463 Name ==
"cvtsi2ss" ||
464 Name ==
"cvtsi642ss" ||
467 Name.starts_with(
"sqrt.p") ||
469 Name.starts_with(
"storeu.") ||
472 if (Name.consume_front(
"sse2."))
473 return (Name ==
"add.sd" ||
474 Name ==
"cvtdq2pd" ||
475 Name ==
"cvtdq2ps" ||
476 Name ==
"cvtps2pd" ||
477 Name ==
"cvtsi2sd" ||
478 Name ==
"cvtsi642sd" ||
479 Name ==
"cvtss2sd" ||
482 Name.starts_with(
"padds.") ||
483 Name.starts_with(
"paddus.") ||
484 Name.starts_with(
"pcmpeq.") ||
485 Name.starts_with(
"pcmpgt.") ||
490 Name ==
"pmulu.dq" ||
491 Name.starts_with(
"pshuf") ||
492 Name.starts_with(
"psll.dq") ||
493 Name.starts_with(
"psrl.dq") ||
494 Name.starts_with(
"psubs.") ||
495 Name.starts_with(
"psubus.") ||
496 Name.starts_with(
"sqrt.p") ||
498 Name ==
"storel.dq" ||
499 Name.starts_with(
"storeu.") ||
502 if (Name.consume_front(
"sse41."))
503 return (Name.starts_with(
"blendp") ||
504 Name ==
"movntdqa" ||
514 Name.starts_with(
"pmovsx") ||
515 Name.starts_with(
"pmovzx") ||
518 if (Name.consume_front(
"sse42."))
519 return Name ==
"crc32.64.8";
521 if (Name.consume_front(
"sse4a."))
522 return Name.starts_with(
"movnt.");
524 if (Name.consume_front(
"ssse3."))
525 return (Name ==
"pabs.b.128" ||
526 Name ==
"pabs.d.128" ||
527 Name ==
"pabs.w.128");
529 if (Name.consume_front(
"xop."))
530 return (Name ==
"vpcmov" ||
531 Name ==
"vpcmov.256" ||
532 Name.starts_with(
"vpcom") ||
533 Name.starts_with(
"vprot"));
535 return (Name ==
"addcarry.u32" ||
536 Name ==
"addcarry.u64" ||
537 Name ==
"addcarryx.u32" ||
538 Name ==
"addcarryx.u64" ||
539 Name ==
"subborrow.u32" ||
540 Name ==
"subborrow.u64" ||
541 Name.starts_with(
"vcvtph2ps."));
547 if (!Name.consume_front(
"x86."))
555 if (Name ==
"rdtscp") {
557 if (
F->getFunctionType()->getNumParams() == 0)
562 Intrinsic::x86_rdtscp);
569 if (Name.consume_front(
"sse41.ptest")) {
571 .
Case(
"c", Intrinsic::x86_sse41_ptestc)
572 .
Case(
"z", Intrinsic::x86_sse41_ptestz)
573 .
Case(
"nzc", Intrinsic::x86_sse41_ptestnzc)
586 .
Case(
"sse41.insertps", Intrinsic::x86_sse41_insertps)
587 .
Case(
"sse41.dppd", Intrinsic::x86_sse41_dppd)
588 .
Case(
"sse41.dpps", Intrinsic::x86_sse41_dpps)
589 .
Case(
"sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
590 .
Case(
"avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
591 .
Case(
"avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
596 if (Name.consume_front(
"avx512.")) {
597 if (Name.consume_front(
"mask.cmp.")) {
600 .
Case(
"pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
601 .
Case(
"pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
602 .
Case(
"pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
603 .
Case(
"ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
604 .
Case(
"ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
605 .
Case(
"ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
609 }
else if (Name.starts_with(
"vpdpbusd.") ||
610 Name.starts_with(
"vpdpbusds.")) {
613 .
Case(
"vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
614 .
Case(
"vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
615 .
Case(
"vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
616 .
Case(
"vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
617 .
Case(
"vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
618 .
Case(
"vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
622 }
else if (Name.starts_with(
"vpdpwssd.") ||
623 Name.starts_with(
"vpdpwssds.")) {
626 .
Case(
"vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)
627 .
Case(
"vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)
628 .
Case(
"vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)
629 .
Case(
"vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)
630 .
Case(
"vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)
631 .
Case(
"vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)
639 if (Name.consume_front(
"avx2.")) {
640 if (Name.consume_front(
"vpdpb")) {
643 .
Case(
"ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
644 .
Case(
"ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
645 .
Case(
"ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
646 .
Case(
"ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
647 .
Case(
"sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
648 .
Case(
"sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
649 .
Case(
"suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
650 .
Case(
"suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
651 .
Case(
"uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
652 .
Case(
"uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
653 .
Case(
"uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
654 .
Case(
"uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
658 }
else if (Name.consume_front(
"vpdpw")) {
661 .
Case(
"sud.128", Intrinsic::x86_avx2_vpdpwsud_128)
662 .
Case(
"sud.256", Intrinsic::x86_avx2_vpdpwsud_256)
663 .
Case(
"suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)
664 .
Case(
"suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)
665 .
Case(
"usd.128", Intrinsic::x86_avx2_vpdpwusd_128)
666 .
Case(
"usd.256", Intrinsic::x86_avx2_vpdpwusd_256)
667 .
Case(
"usds.128", Intrinsic::x86_avx2_vpdpwusds_128)
668 .
Case(
"usds.256", Intrinsic::x86_avx2_vpdpwusds_256)
669 .
Case(
"uud.128", Intrinsic::x86_avx2_vpdpwuud_128)
670 .
Case(
"uud.256", Intrinsic::x86_avx2_vpdpwuud_256)
671 .
Case(
"uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)
672 .
Case(
"uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)
680 if (Name.consume_front(
"avx10.")) {
681 if (Name.consume_front(
"vpdpb")) {
684 .
Case(
"ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
685 .
Case(
"ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
686 .
Case(
"sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
687 .
Case(
"suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
688 .
Case(
"uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
689 .
Case(
"uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
693 }
else if (Name.consume_front(
"vpdpw")) {
695 .
Case(
"sud.512", Intrinsic::x86_avx10_vpdpwsud_512)
696 .
Case(
"suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)
697 .
Case(
"usd.512", Intrinsic::x86_avx10_vpdpwusd_512)
698 .
Case(
"usds.512", Intrinsic::x86_avx10_vpdpwusds_512)
699 .
Case(
"uud.512", Intrinsic::x86_avx10_vpdpwuud_512)
700 .
Case(
"uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)
708 if (Name.consume_front(
"avx512bf16.")) {
711 .
Case(
"cvtne2ps2bf16.128",
712 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
713 .
Case(
"cvtne2ps2bf16.256",
714 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
715 .
Case(
"cvtne2ps2bf16.512",
716 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
717 .
Case(
"mask.cvtneps2bf16.128",
718 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
719 .
Case(
"cvtneps2bf16.256",
720 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
721 .
Case(
"cvtneps2bf16.512",
722 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
729 .
Case(
"dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
730 .
Case(
"dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
731 .
Case(
"dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
738 if (Name.consume_front(
"xop.")) {
740 if (Name.starts_with(
"vpermil2")) {
743 auto Idx =
F->getFunctionType()->getParamType(2);
744 if (Idx->isFPOrFPVectorTy()) {
745 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
746 unsigned EltSize = Idx->getScalarSizeInBits();
747 if (EltSize == 64 && IdxSize == 128)
748 ID = Intrinsic::x86_xop_vpermil2pd;
749 else if (EltSize == 32 && IdxSize == 128)
750 ID = Intrinsic::x86_xop_vpermil2ps;
751 else if (EltSize == 64 && IdxSize == 256)
752 ID = Intrinsic::x86_xop_vpermil2pd_256;
754 ID = Intrinsic::x86_xop_vpermil2ps_256;
756 }
else if (
F->arg_size() == 2)
759 .
Case(
"vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
760 .
Case(
"vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
771 if (Name ==
"seh.recoverfp") {
773 Intrinsic::eh_recoverfp);
785 if (Name.starts_with(
"rbit")) {
788 F->getParent(), Intrinsic::bitreverse,
F->arg_begin()->getType());
792 if (Name ==
"thread.pointer") {
795 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
799 bool Neon = Name.consume_front(
"neon.");
804 if (Name.consume_front(
"bfdot.")) {
808 .
Cases({
"v2f32.v8i8",
"v4f32.v16i8"},
813 size_t OperandWidth =
F->getReturnType()->getPrimitiveSizeInBits();
814 assert((OperandWidth == 64 || OperandWidth == 128) &&
815 "Unexpected operand width");
817 std::array<Type *, 2> Tys{
828 if (Name.consume_front(
"bfm")) {
830 if (Name.consume_back(
".v4f32.v16i8")) {
876 F->arg_begin()->getType());
880 if (Name.consume_front(
"vst")) {
882 static const Regex vstRegex(
"^([1234]|[234]lane)\\.v[a-z0-9]*$");
886 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
887 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
890 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
891 Intrinsic::arm_neon_vst4lane};
893 auto fArgs =
F->getFunctionType()->params();
894 Type *Tys[] = {fArgs[0], fArgs[1]};
897 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
900 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
909 if (Name.consume_front(
"mve.")) {
911 if (Name ==
"vctp64") {
921 if (Name.starts_with(
"vrintn.v")) {
923 F->getParent(), Intrinsic::roundeven,
F->arg_begin()->getType());
928 if (Name.consume_back(
".v4i1")) {
930 if (Name.consume_back(
".predicated.v2i64.v4i32"))
932 return Name ==
"mull.int" || Name ==
"vqdmull";
934 if (Name.consume_back(
".v2i64")) {
936 bool IsGather = Name.consume_front(
"vldr.gather.");
937 if (IsGather || Name.consume_front(
"vstr.scatter.")) {
938 if (Name.consume_front(
"base.")) {
940 Name.consume_front(
"wb.");
943 return Name ==
"predicated.v2i64";
946 if (Name.consume_front(
"offset.predicated."))
947 return Name == (IsGather ?
"v2i64.p0i64" :
"p0i64.v2i64") ||
948 Name == (IsGather ?
"v2i64.p0" :
"p0.v2i64");
961 if (Name.consume_front(
"cde.vcx")) {
963 if (Name.consume_back(
".predicated.v2i64.v4i1"))
965 return Name ==
"1q" || Name ==
"1qa" || Name ==
"2q" || Name ==
"2qa" ||
966 Name ==
"3q" || Name ==
"3qa";
980 F->arg_begin()->getType());
984 if (Name.starts_with(
"addp")) {
986 if (
F->arg_size() != 2)
989 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
991 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
997 if (Name.starts_with(
"bfcvt")) {
1004 if (Name.consume_front(
"sve.")) {
1006 if (Name.consume_front(
"bf")) {
1007 if (Name ==
"mmla") {
1008 Type *Tys[] = {
F->getReturnType(),
1009 std::next(
F->arg_begin())->getType()};
1011 F->getParent(), Intrinsic::aarch64_sve_fmmla, Tys);
1014 if (Name.consume_back(
".lane")) {
1018 .
Case(
"dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
1019 .
Case(
"mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1020 .
Case(
"mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1032 if (Name ==
"fcvt.bf16f32" || Name ==
"fcvtnt.bf16f32") {
1037 if (Name.consume_front(
"addqv")) {
1039 if (!
F->getReturnType()->isFPOrFPVectorTy())
1042 auto Args =
F->getFunctionType()->params();
1043 Type *Tys[] = {
F->getReturnType(), Args[1]};
1045 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
1049 if (Name.consume_front(
"ld")) {
1051 static const Regex LdRegex(
"^[234](.nxv[a-z0-9]+|$)");
1052 if (LdRegex.
match(Name)) {
1058 "Expected 2 arguments for ld* intrinsic.");
1059 Type *PtrTy =
F->getArg(1)->getType();
1062 Intrinsic::aarch64_sve_ld2_sret,
1063 Intrinsic::aarch64_sve_ld3_sret,
1064 Intrinsic::aarch64_sve_ld4_sret,
1067 F->getParent(), LoadIDs[Name[0] -
'2'], {Ty, PtrTy});
1073 if (Name.consume_front(
"tuple.")) {
1075 if (Name.starts_with(
"get")) {
1077 Type *Tys[] = {
F->getReturnType(),
F->arg_begin()->getType()};
1079 F->getParent(), Intrinsic::vector_extract, Tys);
1083 if (Name.starts_with(
"set")) {
1085 auto Args =
F->getFunctionType()->params();
1086 Type *Tys[] = {Args[0], Args[2], Args[1]};
1088 F->getParent(), Intrinsic::vector_insert, Tys);
1092 static const Regex CreateTupleRegex(
"^create[234](.nxv[a-z0-9]+|$)");
1093 if (CreateTupleRegex.
match(Name)) {
1095 auto Args =
F->getFunctionType()->params();
1096 Type *Tys[] = {
F->getReturnType(), Args[1]};
1098 F->getParent(), Intrinsic::vector_insert, Tys);
1104 if (Name.starts_with(
"rev.nxv")) {
1107 F->getParent(), Intrinsic::vector_reverse,
F->getReturnType());
1119 if (Name.consume_front(
"cp.async.bulk.tensor.g2s.")) {
1123 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1125 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1127 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1128 .
Case(
"tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1129 .
Case(
"tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1130 .
Case(
"tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1131 .
Case(
"tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1132 .
Case(
"tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1141 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1155 size_t FlagStartIndex =
F->getFunctionType()->getNumParams() - 3;
1156 Type *ArgType =
F->getFunctionType()->getParamType(FlagStartIndex);
1166 if (Name.consume_front(
"mapa.shared.cluster"))
1167 if (
F->getReturnType()->getPointerAddressSpace() ==
1169 return Intrinsic::nvvm_mapa_shared_cluster;
1171 if (Name.consume_front(
"cp.async.bulk.")) {
1174 .
Case(
"global.to.shared.cluster",
1175 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1176 .
Case(
"shared.cta.to.cluster",
1177 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1181 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1190 if (Name.consume_front(
"fma.rn."))
1192 .
Case(
"bf16", Intrinsic::nvvm_fma_rn_bf16)
1193 .
Case(
"bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1194 .
Case(
"relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1195 .
Case(
"relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1198 if (Name.consume_front(
"fmax."))
1200 .
Case(
"bf16", Intrinsic::nvvm_fmax_bf16)
1201 .
Case(
"bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1202 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1203 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1204 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1205 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1206 .
Case(
"ftz.nan.xorsign.abs.bf16",
1207 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1208 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1209 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1210 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1211 .
Case(
"ftz.xorsign.abs.bf16x2",
1212 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1213 .
Case(
"nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1214 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1215 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1216 .
Case(
"nan.xorsign.abs.bf16x2",
1217 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1218 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1219 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1222 if (Name.consume_front(
"fmin."))
1224 .
Case(
"bf16", Intrinsic::nvvm_fmin_bf16)
1225 .
Case(
"bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1226 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1227 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1228 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1229 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1230 .
Case(
"ftz.nan.xorsign.abs.bf16",
1231 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1232 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1233 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1234 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1235 .
Case(
"ftz.xorsign.abs.bf16x2",
1236 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1237 .
Case(
"nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1238 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1239 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1240 .
Case(
"nan.xorsign.abs.bf16x2",
1241 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1242 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1243 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1246 if (Name.consume_front(
"neg."))
1248 .
Case(
"bf16", Intrinsic::nvvm_neg_bf16)
1249 .
Case(
"bf16x2", Intrinsic::nvvm_neg_bf16x2)
1256 return Name.consume_front(
"local") || Name.consume_front(
"shared") ||
1257 Name.consume_front(
"global") || Name.consume_front(
"constant") ||
1258 Name.consume_front(
"param");
1264 if (Name.starts_with(
"to.fp16")) {
1268 FuncTy->getReturnType());
1271 if (Name.starts_with(
"from.fp16")) {
1275 FuncTy->getReturnType());
1282 bool CanUpgradeDebugIntrinsicsToRecords) {
1283 assert(
F &&
"Illegal to upgrade a non-existent Function.");
1288 if (!Name.consume_front(
"llvm.") || Name.empty())
1294 bool IsArm = Name.consume_front(
"arm.");
1295 if (IsArm || Name.consume_front(
"aarch64.")) {
1301 if (Name.consume_front(
"amdgcn.")) {
1302 if (Name ==
"alignbit") {
1305 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1309 if (Name.consume_front(
"atomic.")) {
1310 if (Name.starts_with(
"inc") || Name.starts_with(
"dec") ||
1311 Name.starts_with(
"cond.sub") || Name.starts_with(
"csub")) {
1320 switch (
F->getIntrinsicID()) {
1324 case Intrinsic::amdgcn_wmma_i32_16x16x64_iu8:
1325 if (
F->arg_size() == 7) {
1330 case Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8:
1331 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
1332 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
1333 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
1334 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
1335 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
1336 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16:
1337 if (
F->arg_size() == 8) {
1344 if (Name.consume_front(
"ds.") || Name.consume_front(
"global.atomic.") ||
1345 Name.consume_front(
"flat.atomic.")) {
1346 if (Name.starts_with(
"fadd") ||
1348 (Name.starts_with(
"fmin") && !Name.starts_with(
"fmin.num")) ||
1349 (Name.starts_with(
"fmax") && !Name.starts_with(
"fmax.num"))) {
1357 if (Name.starts_with(
"ldexp.")) {
1360 F->getParent(), Intrinsic::ldexp,
1361 {F->getReturnType(), F->getArg(1)->getType()});
1370 if (
F->arg_size() == 1) {
1371 if (Name.consume_front(
"convert.")) {
1385 F->arg_begin()->getType());
1390 if (
F->arg_size() == 2 && Name ==
"coro.end") {
1393 Intrinsic::coro_end);
1400 if (Name.consume_front(
"dbg.")) {
1402 if (CanUpgradeDebugIntrinsicsToRecords) {
1403 if (Name ==
"addr" || Name ==
"value" || Name ==
"assign" ||
1404 Name ==
"declare" || Name ==
"label") {
1413 if (Name ==
"addr" || (Name ==
"value" &&
F->arg_size() == 4)) {
1416 Intrinsic::dbg_value);
1423 if (Name.consume_front(
"experimental.vector.")) {
1429 .
StartsWith(
"extract.", Intrinsic::vector_extract)
1430 .
StartsWith(
"insert.", Intrinsic::vector_insert)
1431 .
StartsWith(
"reverse.", Intrinsic::vector_reverse)
1432 .
StartsWith(
"interleave2.", Intrinsic::vector_interleave2)
1433 .
StartsWith(
"deinterleave2.", Intrinsic::vector_deinterleave2)
1435 Intrinsic::vector_partial_reduce_add)
1438 const auto *FT =
F->getFunctionType();
1440 if (
ID == Intrinsic::vector_extract ||
1441 ID == Intrinsic::vector_interleave2)
1444 if (
ID != Intrinsic::vector_interleave2)
1446 if (
ID == Intrinsic::vector_insert ||
1447 ID == Intrinsic::vector_partial_reduce_add)
1455 if (Name.consume_front(
"reduce.")) {
1457 static const Regex R(
"^([a-z]+)\\.[a-z][0-9]+");
1458 if (R.match(Name, &
Groups))
1460 .
Case(
"add", Intrinsic::vector_reduce_add)
1461 .
Case(
"mul", Intrinsic::vector_reduce_mul)
1462 .
Case(
"and", Intrinsic::vector_reduce_and)
1463 .
Case(
"or", Intrinsic::vector_reduce_or)
1464 .
Case(
"xor", Intrinsic::vector_reduce_xor)
1465 .
Case(
"smax", Intrinsic::vector_reduce_smax)
1466 .
Case(
"smin", Intrinsic::vector_reduce_smin)
1467 .
Case(
"umax", Intrinsic::vector_reduce_umax)
1468 .
Case(
"umin", Intrinsic::vector_reduce_umin)
1469 .
Case(
"fmax", Intrinsic::vector_reduce_fmax)
1470 .
Case(
"fmin", Intrinsic::vector_reduce_fmin)
1475 static const Regex R2(
"^v2\\.([a-z]+)\\.[fi][0-9]+");
1480 .
Case(
"fadd", Intrinsic::vector_reduce_fadd)
1481 .
Case(
"fmul", Intrinsic::vector_reduce_fmul)
1486 auto Args =
F->getFunctionType()->params();
1488 {Args[V2 ? 1 : 0]});
1494 if (Name.consume_front(
"splice"))
1498 if (Name.consume_front(
"experimental.stepvector.")) {
1502 F->getParent(),
ID,
F->getFunctionType()->getReturnType());
1507 if (Name.starts_with(
"flt.rounds")) {
1510 Intrinsic::get_rounding);
1515 if (Name.starts_with(
"invariant.group.barrier")) {
1517 auto Args =
F->getFunctionType()->params();
1518 Type* ObjectPtr[1] = {Args[0]};
1521 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1526 if ((Name.starts_with(
"lifetime.start") ||
1527 Name.starts_with(
"lifetime.end")) &&
1528 F->arg_size() == 2) {
1530 ? Intrinsic::lifetime_start
1531 : Intrinsic::lifetime_end;
1534 F->getArg(0)->getType());
1543 .StartsWith(
"memcpy.", Intrinsic::memcpy)
1544 .StartsWith(
"memmove.", Intrinsic::memmove)
1546 if (
F->arg_size() == 5) {
1550 F->getFunctionType()->params().slice(0, 3);
1556 if (Name.starts_with(
"memset.") &&
F->arg_size() == 5) {
1559 const auto *FT =
F->getFunctionType();
1560 Type *ParamTypes[2] = {
1561 FT->getParamType(0),
1565 Intrinsic::memset, ParamTypes);
1571 .
StartsWith(
"masked.load", Intrinsic::masked_load)
1572 .
StartsWith(
"masked.gather", Intrinsic::masked_gather)
1573 .
StartsWith(
"masked.store", Intrinsic::masked_store)
1574 .
StartsWith(
"masked.scatter", Intrinsic::masked_scatter)
1576 if (MaskedID &&
F->arg_size() == 4) {
1578 if (MaskedID == Intrinsic::masked_load ||
1579 MaskedID == Intrinsic::masked_gather) {
1581 F->getParent(), MaskedID,
1582 {F->getReturnType(), F->getArg(0)->getType()});
1586 F->getParent(), MaskedID,
1587 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1593 if (Name.consume_front(
"nvvm.")) {
1595 if (
F->arg_size() == 1) {
1598 .
Cases({
"brev32",
"brev64"}, Intrinsic::bitreverse)
1599 .Case(
"clz.i", Intrinsic::ctlz)
1600 .
Case(
"popc.i", Intrinsic::ctpop)
1604 {F->getReturnType()});
1607 }
else if (
F->arg_size() == 2) {
1610 .
Cases({
"max.s",
"max.i",
"max.ll"}, Intrinsic::smax)
1611 .Cases({
"min.s",
"min.i",
"min.ll"}, Intrinsic::smin)
1612 .Cases({
"max.us",
"max.ui",
"max.ull"}, Intrinsic::umax)
1613 .Cases({
"min.us",
"min.ui",
"min.ull"}, Intrinsic::umin)
1617 {F->getReturnType()});
1623 if (!
F->getReturnType()->getScalarType()->isBFloatTy()) {
1651 bool Expand =
false;
1652 if (Name.consume_front(
"abs."))
1655 Name ==
"i" || Name ==
"ll" || Name ==
"bf16" || Name ==
"bf16x2";
1656 else if (Name.consume_front(
"fabs."))
1658 Expand = Name ==
"f" || Name ==
"ftz.f" || Name ==
"d";
1659 else if (Name.consume_front(
"ex2.approx."))
1662 Name ==
"f" || Name ==
"ftz.f" || Name ==
"d" || Name ==
"f16x2";
1663 else if (Name.consume_front(
"atomic.load."))
1672 else if (Name.consume_front(
"bitcast."))
1675 Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" || Name ==
"d2ll";
1676 else if (Name.consume_front(
"rotate."))
1678 Expand = Name ==
"b32" || Name ==
"b64" || Name ==
"right.b64";
1679 else if (Name.consume_front(
"ptr.gen.to."))
1682 else if (Name.consume_front(
"ptr."))
1685 else if (Name.consume_front(
"ldg.global."))
1687 Expand = (Name.starts_with(
"i.") || Name.starts_with(
"f.") ||
1688 Name.starts_with(
"p."));
1691 .
Case(
"barrier0",
true)
1692 .
Case(
"barrier.n",
true)
1693 .
Case(
"barrier.sync.cnt",
true)
1694 .
Case(
"barrier.sync",
true)
1695 .
Case(
"barrier",
true)
1696 .
Case(
"bar.sync",
true)
1697 .
Case(
"barrier0.popc",
true)
1698 .
Case(
"barrier0.and",
true)
1699 .
Case(
"barrier0.or",
true)
1700 .
Case(
"clz.ll",
true)
1701 .
Case(
"popc.ll",
true)
1703 .
Case(
"swap.lo.hi.b64",
true)
1704 .
Case(
"tanh.approx.f32",
true)
1716 if (Name.starts_with(
"objectsize.")) {
1717 Type *Tys[2] = {
F->getReturnType(),
F->arg_begin()->getType() };
1718 if (
F->arg_size() == 2 ||
F->arg_size() == 3) {
1721 Intrinsic::objectsize, Tys);
1728 if (Name.starts_with(
"ptr.annotation.") &&
F->arg_size() == 4) {
1731 F->getParent(), Intrinsic::ptr_annotation,
1732 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1738 if (Name.consume_front(
"riscv.")) {
1741 .
Case(
"aes32dsi", Intrinsic::riscv_aes32dsi)
1742 .
Case(
"aes32dsmi", Intrinsic::riscv_aes32dsmi)
1743 .
Case(
"aes32esi", Intrinsic::riscv_aes32esi)
1744 .
Case(
"aes32esmi", Intrinsic::riscv_aes32esmi)
1747 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1760 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1761 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1770 .
StartsWith(
"sha256sig0", Intrinsic::riscv_sha256sig0)
1771 .
StartsWith(
"sha256sig1", Intrinsic::riscv_sha256sig1)
1772 .
StartsWith(
"sha256sum0", Intrinsic::riscv_sha256sum0)
1773 .
StartsWith(
"sha256sum1", Intrinsic::riscv_sha256sum1)
1778 if (
F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1787 if (Name ==
"clmul.i32" || Name ==
"clmul.i64") {
1789 F->getParent(), Intrinsic::clmul, {F->getReturnType()});
1798 if (Name ==
"stackprotectorcheck") {
1805 if (Name ==
"thread.pointer") {
1807 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
1813 if (Name ==
"var.annotation" &&
F->arg_size() == 4) {
1816 F->getParent(), Intrinsic::var_annotation,
1817 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1820 if (Name.consume_front(
"vector.splice")) {
1821 if (Name.starts_with(
".left") || Name.starts_with(
".right"))
1829 if (Name.consume_front(
"wasm.")) {
1832 .
StartsWith(
"fma.", Intrinsic::wasm_relaxed_madd)
1833 .
StartsWith(
"fms.", Intrinsic::wasm_relaxed_nmadd)
1834 .
StartsWith(
"laneselect.", Intrinsic::wasm_relaxed_laneselect)
1839 F->getReturnType());
1843 if (Name.consume_front(
"dot.i8x16.i7x16.")) {
1845 .
Case(
"signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1847 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1866 if (ST && (!
ST->isLiteral() ||
ST->isPacked()) &&
1876 std::string
Name =
F->getName().str();
1879 Name,
F->getParent());
1890 if (Result != std::nullopt) {
1903 bool CanUpgradeDebugIntrinsicsToRecords) {
1923 GV->
getName() ==
"llvm.global_dtors")) ||
1938 unsigned N =
Init->getNumOperands();
1939 std::vector<Constant *> NewCtors(
N);
1940 for (
unsigned i = 0; i !=
N; ++i) {
1943 Ctor->getAggregateElement(1),
1957 unsigned NumElts = ResultTy->getNumElements() * 8;
1961 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
1971 for (
unsigned l = 0; l != NumElts; l += 16)
1972 for (
unsigned i = 0; i != 16; ++i) {
1973 unsigned Idx = NumElts + i - Shift;
1975 Idx -= NumElts - 16;
1976 Idxs[l + i] = Idx + l;
1979 Res = Builder.CreateShuffleVector(Res,
Op,
ArrayRef(Idxs, NumElts));
1983 return Builder.CreateBitCast(Res, ResultTy,
"cast");
1991 unsigned NumElts = ResultTy->getNumElements() * 8;
1995 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
2005 for (
unsigned l = 0; l != NumElts; l += 16)
2006 for (
unsigned i = 0; i != 16; ++i) {
2007 unsigned Idx = i + Shift;
2009 Idx += NumElts - 16;
2010 Idxs[l + i] = Idx + l;
2013 Res = Builder.CreateShuffleVector(
Op, Res,
ArrayRef(Idxs, NumElts));
2017 return Builder.CreateBitCast(Res, ResultTy,
"cast");
2025 Mask = Builder.CreateBitCast(Mask, MaskTy);
2031 for (
unsigned i = 0; i != NumElts; ++i)
2033 Mask = Builder.CreateShuffleVector(Mask, Mask,
ArrayRef(Indices, NumElts),
2044 if (
C->isAllOnesValue())
2049 return Builder.CreateSelect(Mask, Op0, Op1);
2056 if (
C->isAllOnesValue())
2060 Mask->getType()->getIntegerBitWidth());
2061 Mask = Builder.CreateBitCast(Mask, MaskTy);
2062 Mask = Builder.CreateExtractElement(Mask, (
uint64_t)0);
2063 return Builder.CreateSelect(Mask, Op0, Op1);
2076 assert((IsVALIGN || NumElts % 16 == 0) &&
"Illegal NumElts for PALIGNR!");
2077 assert((!IsVALIGN || NumElts <= 16) &&
"NumElts too large for VALIGN!");
2082 ShiftVal &= (NumElts - 1);
2091 if (ShiftVal > 16) {
2099 for (
unsigned l = 0; l < NumElts; l += 16) {
2100 for (
unsigned i = 0; i != 16; ++i) {
2101 unsigned Idx = ShiftVal + i;
2102 if (!IsVALIGN && Idx >= 16)
2103 Idx += NumElts - 16;
2104 Indices[l + i] = Idx + l;
2109 Op1, Op0,
ArrayRef(Indices, NumElts),
"palignr");
2115 bool ZeroMask,
bool IndexForm) {
2118 unsigned EltWidth = Ty->getScalarSizeInBits();
2119 bool IsFloat = Ty->isFPOrFPVectorTy();
2121 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2122 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2123 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2124 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2125 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2126 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2127 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2128 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2129 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2130 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2131 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2132 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2133 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2134 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2135 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2136 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2137 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2138 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2139 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2140 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2141 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2142 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2143 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2144 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2145 else if (VecWidth == 128 && EltWidth == 16)
2146 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2147 else if (VecWidth == 256 && EltWidth == 16)
2148 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2149 else if (VecWidth == 512 && EltWidth == 16)
2150 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2151 else if (VecWidth == 128 && EltWidth == 8)
2152 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2153 else if (VecWidth == 256 && EltWidth == 8)
2154 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2155 else if (VecWidth == 512 && EltWidth == 8)
2156 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2167 Value *V = Builder.CreateIntrinsic(IID, Args);
2179 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2190 bool IsRotateRight) {
2200 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
2201 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2204 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2205 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2250 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2255 bool IsShiftRight,
bool ZeroMask) {
2269 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
2270 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2273 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2274 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2289 const Align Alignment =
2291 ?
Align(
Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2296 if (
C->isAllOnesValue())
2297 return Builder.CreateAlignedStore(
Data, Ptr, Alignment);
2302 return Builder.CreateMaskedStore(
Data, Ptr, Alignment, Mask);
2308 const Align Alignment =
2317 if (
C->isAllOnesValue())
2318 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2323 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2329 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2330 {Op0, Builder.getInt1(
false)});
2345 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2346 LHS = Builder.CreateShl(
LHS, ShiftAmt);
2347 LHS = Builder.CreateAShr(
LHS, ShiftAmt);
2348 RHS = Builder.CreateShl(
RHS, ShiftAmt);
2349 RHS = Builder.CreateAShr(
RHS, ShiftAmt);
2352 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2353 LHS = Builder.CreateAnd(
LHS, Mask);
2354 RHS = Builder.CreateAnd(
RHS, Mask);
2371 if (!
C || !
C->isAllOnesValue())
2372 Vec = Builder.CreateAnd(Vec,
getX86MaskVec(Builder, Mask, NumElts));
2377 for (
unsigned i = 0; i != NumElts; ++i)
2379 for (
unsigned i = NumElts; i != 8; ++i)
2380 Indices[i] = NumElts + i % NumElts;
2381 Vec = Builder.CreateShuffleVector(Vec,
2385 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2389 unsigned CC,
bool Signed) {
2397 }
else if (CC == 7) {
2433 Value* AndNode = Builder.CreateAnd(Mask,
APInt(8, 1));
2434 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2436 Value* Extract2 = Builder.CreateExtractElement(Src, (
uint64_t)0);
2437 Value*
Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2446 return Builder.CreateSExt(Mask, ReturnOp,
"vpmovm2");
2452 Name = Name.substr(12);
2457 if (Name.starts_with(
"max.p")) {
2458 if (VecWidth == 128 && EltWidth == 32)
2459 IID = Intrinsic::x86_sse_max_ps;
2460 else if (VecWidth == 128 && EltWidth == 64)
2461 IID = Intrinsic::x86_sse2_max_pd;
2462 else if (VecWidth == 256 && EltWidth == 32)
2463 IID = Intrinsic::x86_avx_max_ps_256;
2464 else if (VecWidth == 256 && EltWidth == 64)
2465 IID = Intrinsic::x86_avx_max_pd_256;
2468 }
else if (Name.starts_with(
"min.p")) {
2469 if (VecWidth == 128 && EltWidth == 32)
2470 IID = Intrinsic::x86_sse_min_ps;
2471 else if (VecWidth == 128 && EltWidth == 64)
2472 IID = Intrinsic::x86_sse2_min_pd;
2473 else if (VecWidth == 256 && EltWidth == 32)
2474 IID = Intrinsic::x86_avx_min_ps_256;
2475 else if (VecWidth == 256 && EltWidth == 64)
2476 IID = Intrinsic::x86_avx_min_pd_256;
2479 }
else if (Name.starts_with(
"pshuf.b.")) {
2480 if (VecWidth == 128)
2481 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2482 else if (VecWidth == 256)
2483 IID = Intrinsic::x86_avx2_pshuf_b;
2484 else if (VecWidth == 512)
2485 IID = Intrinsic::x86_avx512_pshuf_b_512;
2488 }
else if (Name.starts_with(
"pmul.hr.sw.")) {
2489 if (VecWidth == 128)
2490 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2491 else if (VecWidth == 256)
2492 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2493 else if (VecWidth == 512)
2494 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2497 }
else if (Name.starts_with(
"pmulh.w.")) {
2498 if (VecWidth == 128)
2499 IID = Intrinsic::x86_sse2_pmulh_w;
2500 else if (VecWidth == 256)
2501 IID = Intrinsic::x86_avx2_pmulh_w;
2502 else if (VecWidth == 512)
2503 IID = Intrinsic::x86_avx512_pmulh_w_512;
2506 }
else if (Name.starts_with(
"pmulhu.w.")) {
2507 if (VecWidth == 128)
2508 IID = Intrinsic::x86_sse2_pmulhu_w;
2509 else if (VecWidth == 256)
2510 IID = Intrinsic::x86_avx2_pmulhu_w;
2511 else if (VecWidth == 512)
2512 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2515 }
else if (Name.starts_with(
"pmaddw.d.")) {
2516 if (VecWidth == 128)
2517 IID = Intrinsic::x86_sse2_pmadd_wd;
2518 else if (VecWidth == 256)
2519 IID = Intrinsic::x86_avx2_pmadd_wd;
2520 else if (VecWidth == 512)
2521 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2524 }
else if (Name.starts_with(
"pmaddubs.w.")) {
2525 if (VecWidth == 128)
2526 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2527 else if (VecWidth == 256)
2528 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2529 else if (VecWidth == 512)
2530 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2533 }
else if (Name.starts_with(
"packsswb.")) {
2534 if (VecWidth == 128)
2535 IID = Intrinsic::x86_sse2_packsswb_128;
2536 else if (VecWidth == 256)
2537 IID = Intrinsic::x86_avx2_packsswb;
2538 else if (VecWidth == 512)
2539 IID = Intrinsic::x86_avx512_packsswb_512;
2542 }
else if (Name.starts_with(
"packssdw.")) {
2543 if (VecWidth == 128)
2544 IID = Intrinsic::x86_sse2_packssdw_128;
2545 else if (VecWidth == 256)
2546 IID = Intrinsic::x86_avx2_packssdw;
2547 else if (VecWidth == 512)
2548 IID = Intrinsic::x86_avx512_packssdw_512;
2551 }
else if (Name.starts_with(
"packuswb.")) {
2552 if (VecWidth == 128)
2553 IID = Intrinsic::x86_sse2_packuswb_128;
2554 else if (VecWidth == 256)
2555 IID = Intrinsic::x86_avx2_packuswb;
2556 else if (VecWidth == 512)
2557 IID = Intrinsic::x86_avx512_packuswb_512;
2560 }
else if (Name.starts_with(
"packusdw.")) {
2561 if (VecWidth == 128)
2562 IID = Intrinsic::x86_sse41_packusdw;
2563 else if (VecWidth == 256)
2564 IID = Intrinsic::x86_avx2_packusdw;
2565 else if (VecWidth == 512)
2566 IID = Intrinsic::x86_avx512_packusdw_512;
2569 }
else if (Name.starts_with(
"vpermilvar.")) {
2570 if (VecWidth == 128 && EltWidth == 32)
2571 IID = Intrinsic::x86_avx_vpermilvar_ps;
2572 else if (VecWidth == 128 && EltWidth == 64)
2573 IID = Intrinsic::x86_avx_vpermilvar_pd;
2574 else if (VecWidth == 256 && EltWidth == 32)
2575 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2576 else if (VecWidth == 256 && EltWidth == 64)
2577 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2578 else if (VecWidth == 512 && EltWidth == 32)
2579 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2580 else if (VecWidth == 512 && EltWidth == 64)
2581 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2584 }
else if (Name ==
"cvtpd2dq.256") {
2585 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2586 }
else if (Name ==
"cvtpd2ps.256") {
2587 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2588 }
else if (Name ==
"cvttpd2dq.256") {
2589 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2590 }
else if (Name ==
"cvttps2dq.128") {
2591 IID = Intrinsic::x86_sse2_cvttps2dq;
2592 }
else if (Name ==
"cvttps2dq.256") {
2593 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2594 }
else if (Name.starts_with(
"permvar.")) {
2596 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2597 IID = Intrinsic::x86_avx2_permps;
2598 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2599 IID = Intrinsic::x86_avx2_permd;
2600 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2601 IID = Intrinsic::x86_avx512_permvar_df_256;
2602 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2603 IID = Intrinsic::x86_avx512_permvar_di_256;
2604 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2605 IID = Intrinsic::x86_avx512_permvar_sf_512;
2606 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2607 IID = Intrinsic::x86_avx512_permvar_si_512;
2608 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2609 IID = Intrinsic::x86_avx512_permvar_df_512;
2610 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2611 IID = Intrinsic::x86_avx512_permvar_di_512;
2612 else if (VecWidth == 128 && EltWidth == 16)
2613 IID = Intrinsic::x86_avx512_permvar_hi_128;
2614 else if (VecWidth == 256 && EltWidth == 16)
2615 IID = Intrinsic::x86_avx512_permvar_hi_256;
2616 else if (VecWidth == 512 && EltWidth == 16)
2617 IID = Intrinsic::x86_avx512_permvar_hi_512;
2618 else if (VecWidth == 128 && EltWidth == 8)
2619 IID = Intrinsic::x86_avx512_permvar_qi_128;
2620 else if (VecWidth == 256 && EltWidth == 8)
2621 IID = Intrinsic::x86_avx512_permvar_qi_256;
2622 else if (VecWidth == 512 && EltWidth == 8)
2623 IID = Intrinsic::x86_avx512_permvar_qi_512;
2626 }
else if (Name.starts_with(
"dbpsadbw.")) {
2627 if (VecWidth == 128)
2628 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2629 else if (VecWidth == 256)
2630 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2631 else if (VecWidth == 512)
2632 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2635 }
else if (Name.starts_with(
"pmultishift.qb.")) {
2636 if (VecWidth == 128)
2637 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2638 else if (VecWidth == 256)
2639 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2640 else if (VecWidth == 512)
2641 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2644 }
else if (Name.starts_with(
"conflict.")) {
2645 if (Name[9] ==
'd' && VecWidth == 128)
2646 IID = Intrinsic::x86_avx512_conflict_d_128;
2647 else if (Name[9] ==
'd' && VecWidth == 256)
2648 IID = Intrinsic::x86_avx512_conflict_d_256;
2649 else if (Name[9] ==
'd' && VecWidth == 512)
2650 IID = Intrinsic::x86_avx512_conflict_d_512;
2651 else if (Name[9] ==
'q' && VecWidth == 128)
2652 IID = Intrinsic::x86_avx512_conflict_q_128;
2653 else if (Name[9] ==
'q' && VecWidth == 256)
2654 IID = Intrinsic::x86_avx512_conflict_q_256;
2655 else if (Name[9] ==
'q' && VecWidth == 512)
2656 IID = Intrinsic::x86_avx512_conflict_q_512;
2659 }
else if (Name.starts_with(
"pavg.")) {
2660 if (Name[5] ==
'b' && VecWidth == 128)
2661 IID = Intrinsic::x86_sse2_pavg_b;
2662 else if (Name[5] ==
'b' && VecWidth == 256)
2663 IID = Intrinsic::x86_avx2_pavg_b;
2664 else if (Name[5] ==
'b' && VecWidth == 512)
2665 IID = Intrinsic::x86_avx512_pavg_b_512;
2666 else if (Name[5] ==
'w' && VecWidth == 128)
2667 IID = Intrinsic::x86_sse2_pavg_w;
2668 else if (Name[5] ==
'w' && VecWidth == 256)
2669 IID = Intrinsic::x86_avx2_pavg_w;
2670 else if (Name[5] ==
'w' && VecWidth == 512)
2671 IID = Intrinsic::x86_avx512_pavg_w_512;
2680 Rep = Builder.CreateIntrinsic(IID, Args);
2691 if (AsmStr->find(
"mov\tfp") == 0 &&
2692 AsmStr->find(
"objc_retainAutoreleaseReturnValue") != std::string::npos &&
2693 (Pos = AsmStr->find(
"# marker")) != std::string::npos) {
2694 AsmStr->replace(Pos, 1,
";");
2700 Value *Rep =
nullptr;
2702 if (Name ==
"abs.i" || Name ==
"abs.ll") {
2704 Value *Neg = Builder.CreateNeg(Arg,
"neg");
2705 Value *Cmp = Builder.CreateICmpSGE(
2707 Rep = Builder.CreateSelect(Cmp, Arg, Neg,
"abs");
2708 }
else if (Name ==
"abs.bf16" || Name ==
"abs.bf16x2") {
2709 Type *Ty = (Name ==
"abs.bf16")
2713 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2714 Rep = Builder.CreateBitCast(Abs, CI->
getType());
2715 }
else if (Name ==
"fabs.f" || Name ==
"fabs.ftz.f" || Name ==
"fabs.d") {
2716 Intrinsic::ID IID = (Name ==
"fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2717 : Intrinsic::nvvm_fabs;
2718 Rep = Builder.CreateUnaryIntrinsic(IID, CI->
getArgOperand(0));
2719 }
else if (Name.consume_front(
"ex2.approx.")) {
2721 Intrinsic::ID IID = Name.starts_with(
"ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2722 : Intrinsic::nvvm_ex2_approx;
2723 Rep = Builder.CreateUnaryIntrinsic(IID, CI->
getArgOperand(0));
2724 }
else if (Name.starts_with(
"atomic.load.add.f32.p") ||
2725 Name.starts_with(
"atomic.load.add.f64.p")) {
2730 }
else if (Name.starts_with(
"atomic.load.inc.32.p") ||
2731 Name.starts_with(
"atomic.load.dec.32.p")) {
2736 Rep = Builder.CreateAtomicRMW(
Op, Ptr, Val,
MaybeAlign(),
2738 }
else if (Name ==
"clz.ll") {
2741 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->
getType()},
2742 {Arg, Builder.getFalse()},
2744 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(),
"ctlz.trunc");
2745 }
else if (Name ==
"popc.ll") {
2749 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->
getType()},
2750 Arg,
nullptr,
"ctpop");
2751 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(),
"ctpop.trunc");
2752 }
else if (Name ==
"h2f") {
2754 Builder.CreateBitCast(CI->
getArgOperand(0), Builder.getHalfTy());
2755 Rep = Builder.CreateFPExt(Cast, Builder.getFloatTy());
2756 }
else if (Name.consume_front(
"bitcast.") &&
2757 (Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" ||
2760 }
else if (Name ==
"rotate.b32") {
2763 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2764 {Arg, Arg, ShiftAmt});
2765 }
else if (Name ==
"rotate.b64") {
2769 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2770 {Arg, Arg, ZExtShiftAmt});
2771 }
else if (Name ==
"rotate.right.b64") {
2775 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2776 {Arg, Arg, ZExtShiftAmt});
2777 }
else if (Name ==
"swap.lo.hi.b64") {
2780 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2781 {Arg, Arg, Builder.getInt64(32)});
2782 }
else if ((Name.consume_front(
"ptr.gen.to.") &&
2785 Name.starts_with(
".to.gen"))) {
2787 }
else if (Name.consume_front(
"ldg.global")) {
2791 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2794 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2796 }
else if (Name ==
"tanh.approx.f32") {
2800 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->
getArgOperand(0),
2802 }
else if (Name ==
"barrier0" || Name ==
"barrier.n" || Name ==
"bar.sync") {
2804 Name.ends_with(
'0') ? Builder.getInt32(0) : CI->
getArgOperand(0);
2805 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2807 }
else if (Name ==
"barrier") {
2808 Rep = Builder.CreateIntrinsic(
2809 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2811 }
else if (Name ==
"barrier.sync") {
2812 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2814 }
else if (Name ==
"barrier.sync.cnt") {
2815 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2817 }
else if (Name ==
"barrier0.popc" || Name ==
"barrier0.and" ||
2818 Name ==
"barrier0.or") {
2820 C = Builder.CreateICmpNE(
C, Builder.getInt32(0));
2824 .
Case(
"barrier0.popc",
2825 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2826 .
Case(
"barrier0.and",
2827 Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2828 .
Case(
"barrier0.or",
2829 Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2830 Value *Bar = Builder.CreateIntrinsic(IID, {}, {Builder.getInt32(0),
C});
2831 Rep = Builder.CreateZExt(Bar, CI->
getType());
2835 !
F->getReturnType()->getScalarType()->isBFloatTy()) {
2845 ? Builder.CreateBitCast(Arg, NewType)
2848 Rep = Builder.CreateCall(NewFn, Args);
2849 if (
F->getReturnType()->isIntegerTy())
2850 Rep = Builder.CreateBitCast(Rep,
F->getReturnType());
2860 Value *Rep =
nullptr;
2862 if (Name.starts_with(
"sse4a.movnt.")) {
2874 Builder.CreateExtractElement(Arg1, (
uint64_t)0,
"extractelement");
2877 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2878 }
else if (Name.starts_with(
"avx.movnt.") ||
2879 Name.starts_with(
"avx512.storent.")) {
2891 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2892 }
else if (Name ==
"sse2.storel.dq") {
2897 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
2898 Value *Elt = Builder.CreateExtractElement(BC0, (
uint64_t)0);
2899 Builder.CreateAlignedStore(Elt, Arg0,
Align(1));
2900 }
else if (Name.starts_with(
"sse.storeu.") ||
2901 Name.starts_with(
"sse2.storeu.") ||
2902 Name.starts_with(
"avx.storeu.")) {
2905 Builder.CreateAlignedStore(Arg1, Arg0,
Align(1));
2906 }
else if (Name ==
"avx512.mask.store.ss") {
2910 }
else if (Name.starts_with(
"avx512.mask.store")) {
2912 bool Aligned = Name[17] !=
'u';
2915 }
else if (Name.starts_with(
"sse2.pcmp") || Name.starts_with(
"avx2.pcmp")) {
2918 bool CmpEq = Name[9] ==
'e';
2921 Rep = Builder.CreateSExt(Rep, CI->
getType(),
"");
2922 }
else if (Name.starts_with(
"avx512.broadcastm")) {
2929 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2930 }
else if (Name ==
"sse.sqrt.ss" || Name ==
"sse2.sqrt.sd") {
2932 Value *Elt0 = Builder.CreateExtractElement(Vec, (
uint64_t)0);
2933 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->
getType(), Elt0);
2934 Rep = Builder.CreateInsertElement(Vec, Elt0, (
uint64_t)0);
2935 }
else if (Name.starts_with(
"avx.sqrt.p") ||
2936 Name.starts_with(
"sse2.sqrt.p") ||
2937 Name.starts_with(
"sse.sqrt.p")) {
2938 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
2939 {CI->getArgOperand(0)});
2940 }
else if (Name.starts_with(
"avx512.mask.sqrt.p")) {
2944 Intrinsic::ID IID = Name[18] ==
's' ? Intrinsic::x86_avx512_sqrt_ps_512
2945 : Intrinsic::x86_avx512_sqrt_pd_512;
2948 Rep = Builder.CreateIntrinsic(IID, Args);
2950 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
2951 {CI->getArgOperand(0)});
2955 }
else if (Name.starts_with(
"avx512.ptestm") ||
2956 Name.starts_with(
"avx512.ptestnm")) {
2960 Rep = Builder.CreateAnd(Op0, Op1);
2966 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2968 }
else if (Name.starts_with(
"avx512.mask.pbroadcast")) {
2971 Rep = Builder.CreateVectorSplat(NumElts, CI->
getArgOperand(0));
2974 }
else if (Name.starts_with(
"avx512.kunpck")) {
2979 for (
unsigned i = 0; i != NumElts; ++i)
2988 Rep = Builder.CreateShuffleVector(
RHS,
LHS,
ArrayRef(Indices, NumElts));
2989 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2990 }
else if (Name ==
"avx512.kand.w") {
2993 Rep = Builder.CreateAnd(
LHS,
RHS);
2994 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2995 }
else if (Name ==
"avx512.kandn.w") {
2998 LHS = Builder.CreateNot(
LHS);
2999 Rep = Builder.CreateAnd(
LHS,
RHS);
3000 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3001 }
else if (Name ==
"avx512.kor.w") {
3004 Rep = Builder.CreateOr(
LHS,
RHS);
3005 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3006 }
else if (Name ==
"avx512.kxor.w") {
3009 Rep = Builder.CreateXor(
LHS,
RHS);
3010 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3011 }
else if (Name ==
"avx512.kxnor.w") {
3014 LHS = Builder.CreateNot(
LHS);
3015 Rep = Builder.CreateXor(
LHS,
RHS);
3016 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3017 }
else if (Name ==
"avx512.knot.w") {
3019 Rep = Builder.CreateNot(Rep);
3020 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3021 }
else if (Name ==
"avx512.kortestz.w" || Name ==
"avx512.kortestc.w") {
3024 Rep = Builder.CreateOr(
LHS,
RHS);
3025 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
3027 if (Name[14] ==
'c')
3031 Rep = Builder.CreateICmpEQ(Rep,
C);
3032 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
3033 }
else if (Name ==
"sse.add.ss" || Name ==
"sse2.add.sd" ||
3034 Name ==
"sse.sub.ss" || Name ==
"sse2.sub.sd" ||
3035 Name ==
"sse.mul.ss" || Name ==
"sse2.mul.sd" ||
3036 Name ==
"sse.div.ss" || Name ==
"sse2.div.sd") {
3039 ConstantInt::get(I32Ty, 0));
3041 ConstantInt::get(I32Ty, 0));
3043 if (Name.contains(
".add."))
3044 EltOp = Builder.CreateFAdd(Elt0, Elt1);
3045 else if (Name.contains(
".sub."))
3046 EltOp = Builder.CreateFSub(Elt0, Elt1);
3047 else if (Name.contains(
".mul."))
3048 EltOp = Builder.CreateFMul(Elt0, Elt1);
3050 EltOp = Builder.CreateFDiv(Elt0, Elt1);
3051 Rep = Builder.CreateInsertElement(CI->
getArgOperand(0), EltOp,
3052 ConstantInt::get(I32Ty, 0));
3053 }
else if (Name.starts_with(
"avx512.mask.pcmp")) {
3055 bool CmpEq = Name[16] ==
'e';
3057 }
else if (Name.starts_with(
"avx512.mask.vpshufbitqmb.")) {
3066 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3069 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3072 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3079 }
else if (Name.starts_with(
"avx512.mask.fpclass.p")) {
3084 if (VecWidth == 128 && EltWidth == 32)
3085 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3086 else if (VecWidth == 256 && EltWidth == 32)
3087 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3088 else if (VecWidth == 512 && EltWidth == 32)
3089 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3090 else if (VecWidth == 128 && EltWidth == 64)
3091 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3092 else if (VecWidth == 256 && EltWidth == 64)
3093 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3094 else if (VecWidth == 512 && EltWidth == 64)
3095 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3102 }
else if (Name.starts_with(
"avx512.cmp.p")) {
3104 Type *OpTy = Args[0]->getType();
3108 if (VecWidth == 128 && EltWidth == 32)
3109 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3110 else if (VecWidth == 256 && EltWidth == 32)
3111 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3112 else if (VecWidth == 512 && EltWidth == 32)
3113 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3114 else if (VecWidth == 128 && EltWidth == 64)
3115 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3116 else if (VecWidth == 256 && EltWidth == 64)
3117 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3118 else if (VecWidth == 512 && EltWidth == 64)
3119 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3124 if (VecWidth == 512)
3126 Args.push_back(Mask);
3128 Rep = Builder.CreateIntrinsic(IID, Args);
3129 }
else if (Name.starts_with(
"avx512.mask.cmp.")) {
3133 }
else if (Name.starts_with(
"avx512.mask.ucmp.")) {
3136 }
else if (Name.starts_with(
"avx512.cvtb2mask.") ||
3137 Name.starts_with(
"avx512.cvtw2mask.") ||
3138 Name.starts_with(
"avx512.cvtd2mask.") ||
3139 Name.starts_with(
"avx512.cvtq2mask.")) {
3144 }
else if (Name ==
"ssse3.pabs.b.128" || Name ==
"ssse3.pabs.w.128" ||
3145 Name ==
"ssse3.pabs.d.128" || Name.starts_with(
"avx2.pabs") ||
3146 Name.starts_with(
"avx512.mask.pabs")) {
3148 }
else if (Name ==
"sse41.pmaxsb" || Name ==
"sse2.pmaxs.w" ||
3149 Name ==
"sse41.pmaxsd" || Name.starts_with(
"avx2.pmaxs") ||
3150 Name.starts_with(
"avx512.mask.pmaxs")) {
3152 }
else if (Name ==
"sse2.pmaxu.b" || Name ==
"sse41.pmaxuw" ||
3153 Name ==
"sse41.pmaxud" || Name.starts_with(
"avx2.pmaxu") ||
3154 Name.starts_with(
"avx512.mask.pmaxu")) {
3156 }
else if (Name ==
"sse41.pminsb" || Name ==
"sse2.pmins.w" ||
3157 Name ==
"sse41.pminsd" || Name.starts_with(
"avx2.pmins") ||
3158 Name.starts_with(
"avx512.mask.pmins")) {
3160 }
else if (Name ==
"sse2.pminu.b" || Name ==
"sse41.pminuw" ||
3161 Name ==
"sse41.pminud" || Name.starts_with(
"avx2.pminu") ||
3162 Name.starts_with(
"avx512.mask.pminu")) {
3164 }
else if (Name ==
"sse2.pmulu.dq" || Name ==
"avx2.pmulu.dq" ||
3165 Name ==
"avx512.pmulu.dq.512" ||
3166 Name.starts_with(
"avx512.mask.pmulu.dq.")) {
3168 }
else if (Name ==
"sse41.pmuldq" || Name ==
"avx2.pmul.dq" ||
3169 Name ==
"avx512.pmul.dq.512" ||
3170 Name.starts_with(
"avx512.mask.pmul.dq.")) {
3172 }
else if (Name ==
"sse.cvtsi2ss" || Name ==
"sse2.cvtsi2sd" ||
3173 Name ==
"sse.cvtsi642ss" || Name ==
"sse2.cvtsi642sd") {
3178 }
else if (Name ==
"avx512.cvtusi2sd") {
3183 }
else if (Name ==
"sse2.cvtss2sd") {
3185 Rep = Builder.CreateFPExt(
3188 }
else if (Name ==
"sse2.cvtdq2pd" || Name ==
"sse2.cvtdq2ps" ||
3189 Name ==
"avx.cvtdq2.pd.256" || Name ==
"avx.cvtdq2.ps.256" ||
3190 Name.starts_with(
"avx512.mask.cvtdq2pd.") ||
3191 Name.starts_with(
"avx512.mask.cvtudq2pd.") ||
3192 Name.starts_with(
"avx512.mask.cvtdq2ps.") ||
3193 Name.starts_with(
"avx512.mask.cvtudq2ps.") ||
3194 Name.starts_with(
"avx512.mask.cvtqq2pd.") ||
3195 Name.starts_with(
"avx512.mask.cvtuqq2pd.") ||
3196 Name ==
"avx512.mask.cvtqq2ps.256" ||
3197 Name ==
"avx512.mask.cvtqq2ps.512" ||
3198 Name ==
"avx512.mask.cvtuqq2ps.256" ||
3199 Name ==
"avx512.mask.cvtuqq2ps.512" || Name ==
"sse2.cvtps2pd" ||
3200 Name ==
"avx.cvt.ps2.pd.256" ||
3201 Name ==
"avx512.mask.cvtps2pd.128" ||
3202 Name ==
"avx512.mask.cvtps2pd.256") {
3207 unsigned NumDstElts = DstTy->getNumElements();
3209 assert(NumDstElts == 2 &&
"Unexpected vector size");
3210 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1});
3213 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3214 bool IsUnsigned = Name.contains(
"cvtu");
3216 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtps2pd");
3220 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3221 : Intrinsic::x86_avx512_sitofp_round;
3222 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3225 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy,
"cvt")
3226 : Builder.CreateSIToFP(Rep, DstTy,
"cvt");
3232 }
else if (Name.starts_with(
"avx512.mask.vcvtph2ps.") ||
3233 Name.starts_with(
"vcvtph2ps.")) {
3237 unsigned NumDstElts = DstTy->getNumElements();
3238 if (NumDstElts != SrcTy->getNumElements()) {
3239 assert(NumDstElts == 4 &&
"Unexpected vector size");
3240 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1, 2, 3});
3242 Rep = Builder.CreateBitCast(
3244 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtph2ps");
3248 }
else if (Name.starts_with(
"avx512.mask.load")) {
3250 bool Aligned = Name[16] !=
'u';
3253 }
else if (Name.starts_with(
"avx512.mask.expand.load.")) {
3256 ResultTy->getNumElements());
3258 Rep = Builder.CreateIntrinsic(
3259 Intrinsic::masked_expandload, ResultTy,
3261 }
else if (Name.starts_with(
"avx512.mask.compress.store.")) {
3267 Rep = Builder.CreateIntrinsic(
3268 Intrinsic::masked_compressstore, ResultTy,
3270 }
else if (Name.starts_with(
"avx512.mask.compress.") ||
3271 Name.starts_with(
"avx512.mask.expand.")) {
3275 ResultTy->getNumElements());
3277 bool IsCompress = Name[12] ==
'c';
3278 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3279 : Intrinsic::x86_avx512_mask_expand;
3280 Rep = Builder.CreateIntrinsic(
3282 }
else if (Name.starts_with(
"xop.vpcom")) {
3284 if (Name.ends_with(
"ub") || Name.ends_with(
"uw") || Name.ends_with(
"ud") ||
3285 Name.ends_with(
"uq"))
3287 else if (Name.ends_with(
"b") || Name.ends_with(
"w") ||
3288 Name.ends_with(
"d") || Name.ends_with(
"q"))
3297 Name = Name.substr(9);
3298 if (Name.starts_with(
"lt"))
3300 else if (Name.starts_with(
"le"))
3302 else if (Name.starts_with(
"gt"))
3304 else if (Name.starts_with(
"ge"))
3306 else if (Name.starts_with(
"eq"))
3308 else if (Name.starts_with(
"ne"))
3310 else if (Name.starts_with(
"false"))
3312 else if (Name.starts_with(
"true"))
3319 }
else if (Name.starts_with(
"xop.vpcmov")) {
3321 Value *NotSel = Builder.CreateNot(Sel);
3324 Rep = Builder.CreateOr(Sel0, Sel1);
3325 }
else if (Name.starts_with(
"xop.vprot") || Name.starts_with(
"avx512.prol") ||
3326 Name.starts_with(
"avx512.mask.prol")) {
3328 }
else if (Name.starts_with(
"avx512.pror") ||
3329 Name.starts_with(
"avx512.mask.pror")) {
3331 }
else if (Name.starts_with(
"avx512.vpshld.") ||
3332 Name.starts_with(
"avx512.mask.vpshld") ||
3333 Name.starts_with(
"avx512.maskz.vpshld")) {
3334 bool ZeroMask = Name[11] ==
'z';
3336 }
else if (Name.starts_with(
"avx512.vpshrd.") ||
3337 Name.starts_with(
"avx512.mask.vpshrd") ||
3338 Name.starts_with(
"avx512.maskz.vpshrd")) {
3339 bool ZeroMask = Name[11] ==
'z';
3341 }
else if (Name ==
"sse42.crc32.64.8") {
3344 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3346 Rep = Builder.CreateZExt(Rep, CI->
getType(),
"");
3347 }
else if (Name.starts_with(
"avx.vbroadcast.s") ||
3348 Name.starts_with(
"avx512.vbroadcast.s")) {
3351 Type *EltTy = VecTy->getElementType();
3352 unsigned EltNum = VecTy->getNumElements();
3356 for (
unsigned I = 0;
I < EltNum; ++
I)
3357 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty,
I));
3358 }
else if (Name.starts_with(
"sse41.pmovsx") ||
3359 Name.starts_with(
"sse41.pmovzx") ||
3360 Name.starts_with(
"avx2.pmovsx") ||
3361 Name.starts_with(
"avx2.pmovzx") ||
3362 Name.starts_with(
"avx512.mask.pmovsx") ||
3363 Name.starts_with(
"avx512.mask.pmovzx")) {
3365 unsigned NumDstElts = DstTy->getNumElements();
3369 for (
unsigned i = 0; i != NumDstElts; ++i)
3374 bool DoSext = Name.contains(
"pmovsx");
3376 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3381 }
else if (Name ==
"avx512.mask.pmov.qd.256" ||
3382 Name ==
"avx512.mask.pmov.qd.512" ||
3383 Name ==
"avx512.mask.pmov.wb.256" ||
3384 Name ==
"avx512.mask.pmov.wb.512") {
3389 }
else if (Name.starts_with(
"avx.vbroadcastf128") ||
3390 Name ==
"avx2.vbroadcasti128") {
3396 if (NumSrcElts == 2)
3397 Rep = Builder.CreateShuffleVector(Load,
ArrayRef<int>{0, 1, 0, 1});
3399 Rep = Builder.CreateShuffleVector(Load,
3401 }
else if (Name.starts_with(
"avx512.mask.shuf.i") ||
3402 Name.starts_with(
"avx512.mask.shuf.f")) {
3407 unsigned ControlBitsMask = NumLanes - 1;
3408 unsigned NumControlBits = NumLanes / 2;
3411 for (
unsigned l = 0; l != NumLanes; ++l) {
3412 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3414 if (l >= NumLanes / 2)
3415 LaneMask += NumLanes;
3416 for (
unsigned i = 0; i != NumElementsInLane; ++i)
3417 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3423 }
else if (Name.starts_with(
"avx512.mask.broadcastf") ||
3424 Name.starts_with(
"avx512.mask.broadcasti")) {
3427 unsigned NumDstElts =
3431 for (
unsigned i = 0; i != NumDstElts; ++i)
3432 ShuffleMask[i] = i % NumSrcElts;
3438 }
else if (Name.starts_with(
"avx2.pbroadcast") ||
3439 Name.starts_with(
"avx2.vbroadcast") ||
3440 Name.starts_with(
"avx512.pbroadcast") ||
3441 Name.starts_with(
"avx512.mask.broadcast.s")) {
3448 Rep = Builder.CreateShuffleVector(
Op, M);
3453 }
else if (Name.starts_with(
"sse2.padds.") ||
3454 Name.starts_with(
"avx2.padds.") ||
3455 Name.starts_with(
"avx512.padds.") ||
3456 Name.starts_with(
"avx512.mask.padds.")) {
3458 }
else if (Name.starts_with(
"sse2.psubs.") ||
3459 Name.starts_with(
"avx2.psubs.") ||
3460 Name.starts_with(
"avx512.psubs.") ||
3461 Name.starts_with(
"avx512.mask.psubs.")) {
3463 }
else if (Name.starts_with(
"sse2.paddus.") ||
3464 Name.starts_with(
"avx2.paddus.") ||
3465 Name.starts_with(
"avx512.mask.paddus.")) {
3467 }
else if (Name.starts_with(
"sse2.psubus.") ||
3468 Name.starts_with(
"avx2.psubus.") ||
3469 Name.starts_with(
"avx512.mask.psubus.")) {
3471 }
else if (Name.starts_with(
"avx512.mask.palignr.")) {
3476 }
else if (Name.starts_with(
"avx512.mask.valign.")) {
3480 }
else if (Name ==
"sse2.psll.dq" || Name ==
"avx2.psll.dq") {
3485 }
else if (Name ==
"sse2.psrl.dq" || Name ==
"avx2.psrl.dq") {
3490 }
else if (Name ==
"sse2.psll.dq.bs" || Name ==
"avx2.psll.dq.bs" ||
3491 Name ==
"avx512.psll.dq.512") {
3495 }
else if (Name ==
"sse2.psrl.dq.bs" || Name ==
"avx2.psrl.dq.bs" ||
3496 Name ==
"avx512.psrl.dq.512") {
3500 }
else if (Name ==
"sse41.pblendw" || Name.starts_with(
"sse41.blendp") ||
3501 Name.starts_with(
"avx.blend.p") || Name ==
"avx2.pblendw" ||
3502 Name.starts_with(
"avx2.pblendd.")) {
3507 unsigned NumElts = VecTy->getNumElements();
3510 for (
unsigned i = 0; i != NumElts; ++i)
3511 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3513 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3514 }
else if (Name.starts_with(
"avx.vinsertf128.") ||
3515 Name ==
"avx2.vinserti128" ||
3516 Name.starts_with(
"avx512.mask.insert")) {
3520 unsigned DstNumElts =
3522 unsigned SrcNumElts =
3524 unsigned Scale = DstNumElts / SrcNumElts;
3531 for (
unsigned i = 0; i != SrcNumElts; ++i)
3533 for (
unsigned i = SrcNumElts; i != DstNumElts; ++i)
3534 Idxs[i] = SrcNumElts;
3535 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3549 for (
unsigned i = 0; i != DstNumElts; ++i)
3552 for (
unsigned i = 0; i != SrcNumElts; ++i)
3553 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3554 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3560 }
else if (Name.starts_with(
"avx.vextractf128.") ||
3561 Name ==
"avx2.vextracti128" ||
3562 Name.starts_with(
"avx512.mask.vextract")) {
3565 unsigned DstNumElts =
3567 unsigned SrcNumElts =
3569 unsigned Scale = SrcNumElts / DstNumElts;
3576 for (
unsigned i = 0; i != DstNumElts; ++i) {
3577 Idxs[i] = i + (Imm * DstNumElts);
3579 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3585 }
else if (Name.starts_with(
"avx512.mask.perm.df.") ||
3586 Name.starts_with(
"avx512.mask.perm.di.")) {
3590 unsigned NumElts = VecTy->getNumElements();
3593 for (
unsigned i = 0; i != NumElts; ++i)
3594 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3596 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3601 }
else if (Name.starts_with(
"avx.vperm2f128.") || Name ==
"avx2.vperm2i128") {
3613 unsigned HalfSize = NumElts / 2;
3625 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3626 for (
unsigned i = 0; i < HalfSize; ++i)
3627 ShuffleMask[i] = StartIndex + i;
3630 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3631 for (
unsigned i = 0; i < HalfSize; ++i)
3632 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3634 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3636 }
else if (Name.starts_with(
"avx.vpermil.") || Name ==
"sse2.pshuf.d" ||
3637 Name.starts_with(
"avx512.mask.vpermil.p") ||
3638 Name.starts_with(
"avx512.mask.pshuf.d.")) {
3642 unsigned NumElts = VecTy->getNumElements();
3644 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3645 unsigned IdxMask = ((1 << IdxSize) - 1);
3651 for (
unsigned i = 0; i != NumElts; ++i)
3652 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3654 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3659 }
else if (Name ==
"sse2.pshufl.w" ||
3660 Name.starts_with(
"avx512.mask.pshufl.w.")) {
3665 if (Name ==
"sse2.pshufl.w" && NumElts % 8 != 0)
3669 for (
unsigned l = 0; l != NumElts; l += 8) {
3670 for (
unsigned i = 0; i != 4; ++i)
3671 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3672 for (
unsigned i = 4; i != 8; ++i)
3673 Idxs[i + l] = i + l;
3676 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3681 }
else if (Name ==
"sse2.pshufh.w" ||
3682 Name.starts_with(
"avx512.mask.pshufh.w.")) {
3687 if (Name ==
"sse2.pshufh.w" && NumElts % 8 != 0)
3691 for (
unsigned l = 0; l != NumElts; l += 8) {
3692 for (
unsigned i = 0; i != 4; ++i)
3693 Idxs[i + l] = i + l;
3694 for (
unsigned i = 0; i != 4; ++i)
3695 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3698 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3703 }
else if (Name.starts_with(
"avx512.mask.shuf.p")) {
3710 unsigned HalfLaneElts = NumLaneElts / 2;
3713 for (
unsigned i = 0; i != NumElts; ++i) {
3715 Idxs[i] = i - (i % NumLaneElts);
3717 if ((i % NumLaneElts) >= HalfLaneElts)
3721 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3724 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3728 }
else if (Name.starts_with(
"avx512.mask.movddup") ||
3729 Name.starts_with(
"avx512.mask.movshdup") ||
3730 Name.starts_with(
"avx512.mask.movsldup")) {
3736 if (Name.starts_with(
"avx512.mask.movshdup."))
3740 for (
unsigned l = 0; l != NumElts; l += NumLaneElts)
3741 for (
unsigned i = 0; i != NumLaneElts; i += 2) {
3742 Idxs[i + l + 0] = i + l +
Offset;
3743 Idxs[i + l + 1] = i + l +
Offset;
3746 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3750 }
else if (Name.starts_with(
"avx512.mask.punpckl") ||
3751 Name.starts_with(
"avx512.mask.unpckl.")) {
3758 for (
int l = 0; l != NumElts; l += NumLaneElts)
3759 for (
int i = 0; i != NumLaneElts; ++i)
3760 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3762 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3766 }
else if (Name.starts_with(
"avx512.mask.punpckh") ||
3767 Name.starts_with(
"avx512.mask.unpckh.")) {
3774 for (
int l = 0; l != NumElts; l += NumLaneElts)
3775 for (
int i = 0; i != NumLaneElts; ++i)
3776 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3778 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3782 }
else if (Name.starts_with(
"avx512.mask.and.") ||
3783 Name.starts_with(
"avx512.mask.pand.")) {
3786 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3788 Rep = Builder.CreateBitCast(Rep, FTy);
3791 }
else if (Name.starts_with(
"avx512.mask.andn.") ||
3792 Name.starts_with(
"avx512.mask.pandn.")) {
3795 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->
getArgOperand(0), ITy));
3796 Rep = Builder.CreateAnd(Rep,
3798 Rep = Builder.CreateBitCast(Rep, FTy);
3801 }
else if (Name.starts_with(
"avx512.mask.or.") ||
3802 Name.starts_with(
"avx512.mask.por.")) {
3805 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3807 Rep = Builder.CreateBitCast(Rep, FTy);
3810 }
else if (Name.starts_with(
"avx512.mask.xor.") ||
3811 Name.starts_with(
"avx512.mask.pxor.")) {
3814 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3816 Rep = Builder.CreateBitCast(Rep, FTy);
3819 }
else if (Name.starts_with(
"avx512.mask.padd.")) {
3823 }
else if (Name.starts_with(
"avx512.mask.psub.")) {
3827 }
else if (Name.starts_with(
"avx512.mask.pmull.")) {
3831 }
else if (Name.starts_with(
"avx512.mask.add.p")) {
3832 if (Name.ends_with(
".512")) {
3834 if (Name[17] ==
's')
3835 IID = Intrinsic::x86_avx512_add_ps_512;
3837 IID = Intrinsic::x86_avx512_add_pd_512;
3839 Rep = Builder.CreateIntrinsic(
3847 }
else if (Name.starts_with(
"avx512.mask.div.p")) {
3848 if (Name.ends_with(
".512")) {
3850 if (Name[17] ==
's')
3851 IID = Intrinsic::x86_avx512_div_ps_512;
3853 IID = Intrinsic::x86_avx512_div_pd_512;
3855 Rep = Builder.CreateIntrinsic(
3863 }
else if (Name.starts_with(
"avx512.mask.mul.p")) {
3864 if (Name.ends_with(
".512")) {
3866 if (Name[17] ==
's')
3867 IID = Intrinsic::x86_avx512_mul_ps_512;
3869 IID = Intrinsic::x86_avx512_mul_pd_512;
3871 Rep = Builder.CreateIntrinsic(
3879 }
else if (Name.starts_with(
"avx512.mask.sub.p")) {
3880 if (Name.ends_with(
".512")) {
3882 if (Name[17] ==
's')
3883 IID = Intrinsic::x86_avx512_sub_ps_512;
3885 IID = Intrinsic::x86_avx512_sub_pd_512;
3887 Rep = Builder.CreateIntrinsic(
3895 }
else if ((Name.starts_with(
"avx512.mask.max.p") ||
3896 Name.starts_with(
"avx512.mask.min.p")) &&
3897 Name.drop_front(18) ==
".512") {
3898 bool IsDouble = Name[17] ==
'd';
3899 bool IsMin = Name[13] ==
'i';
3901 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3902 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3905 Rep = Builder.CreateIntrinsic(
3910 }
else if (Name.starts_with(
"avx512.mask.lzcnt.")) {
3912 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->
getType(),
3913 {CI->getArgOperand(0), Builder.getInt1(false)});
3916 }
else if (Name.starts_with(
"avx512.mask.psll")) {
3917 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3918 bool IsVariable = Name[16] ==
'v';
3919 char Size = Name[16] ==
'.' ? Name[17]
3920 : Name[17] ==
'.' ? Name[18]
3921 : Name[18] ==
'.' ? Name[19]
3925 if (IsVariable && Name[17] !=
'.') {
3926 if (
Size ==
'd' && Name[17] ==
'2')
3927 IID = Intrinsic::x86_avx2_psllv_q;
3928 else if (
Size ==
'd' && Name[17] ==
'4')
3929 IID = Intrinsic::x86_avx2_psllv_q_256;
3930 else if (
Size ==
's' && Name[17] ==
'4')
3931 IID = Intrinsic::x86_avx2_psllv_d;
3932 else if (
Size ==
's' && Name[17] ==
'8')
3933 IID = Intrinsic::x86_avx2_psllv_d_256;
3934 else if (
Size ==
'h' && Name[17] ==
'8')
3935 IID = Intrinsic::x86_avx512_psllv_w_128;
3936 else if (
Size ==
'h' && Name[17] ==
'1')
3937 IID = Intrinsic::x86_avx512_psllv_w_256;
3938 else if (Name[17] ==
'3' && Name[18] ==
'2')
3939 IID = Intrinsic::x86_avx512_psllv_w_512;
3942 }
else if (Name.ends_with(
".128")) {
3944 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3945 : Intrinsic::x86_sse2_psll_d;
3946 else if (
Size ==
'q')
3947 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3948 : Intrinsic::x86_sse2_psll_q;
3949 else if (
Size ==
'w')
3950 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3951 : Intrinsic::x86_sse2_psll_w;
3954 }
else if (Name.ends_with(
".256")) {
3956 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3957 : Intrinsic::x86_avx2_psll_d;
3958 else if (
Size ==
'q')
3959 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3960 : Intrinsic::x86_avx2_psll_q;
3961 else if (
Size ==
'w')
3962 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3963 : Intrinsic::x86_avx2_psll_w;
3968 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3969 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3970 : Intrinsic::x86_avx512_psll_d_512;
3971 else if (
Size ==
'q')
3972 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3973 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3974 : Intrinsic::x86_avx512_psll_q_512;
3975 else if (
Size ==
'w')
3976 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3977 : Intrinsic::x86_avx512_psll_w_512;
3983 }
else if (Name.starts_with(
"avx512.mask.psrl")) {
3984 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3985 bool IsVariable = Name[16] ==
'v';
3986 char Size = Name[16] ==
'.' ? Name[17]
3987 : Name[17] ==
'.' ? Name[18]
3988 : Name[18] ==
'.' ? Name[19]
3992 if (IsVariable && Name[17] !=
'.') {
3993 if (
Size ==
'd' && Name[17] ==
'2')
3994 IID = Intrinsic::x86_avx2_psrlv_q;
3995 else if (
Size ==
'd' && Name[17] ==
'4')
3996 IID = Intrinsic::x86_avx2_psrlv_q_256;
3997 else if (
Size ==
's' && Name[17] ==
'4')
3998 IID = Intrinsic::x86_avx2_psrlv_d;
3999 else if (
Size ==
's' && Name[17] ==
'8')
4000 IID = Intrinsic::x86_avx2_psrlv_d_256;
4001 else if (
Size ==
'h' && Name[17] ==
'8')
4002 IID = Intrinsic::x86_avx512_psrlv_w_128;
4003 else if (
Size ==
'h' && Name[17] ==
'1')
4004 IID = Intrinsic::x86_avx512_psrlv_w_256;
4005 else if (Name[17] ==
'3' && Name[18] ==
'2')
4006 IID = Intrinsic::x86_avx512_psrlv_w_512;
4009 }
else if (Name.ends_with(
".128")) {
4011 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
4012 : Intrinsic::x86_sse2_psrl_d;
4013 else if (
Size ==
'q')
4014 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
4015 : Intrinsic::x86_sse2_psrl_q;
4016 else if (
Size ==
'w')
4017 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
4018 : Intrinsic::x86_sse2_psrl_w;
4021 }
else if (Name.ends_with(
".256")) {
4023 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
4024 : Intrinsic::x86_avx2_psrl_d;
4025 else if (
Size ==
'q')
4026 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
4027 : Intrinsic::x86_avx2_psrl_q;
4028 else if (
Size ==
'w')
4029 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
4030 : Intrinsic::x86_avx2_psrl_w;
4035 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
4036 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
4037 : Intrinsic::x86_avx512_psrl_d_512;
4038 else if (
Size ==
'q')
4039 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
4040 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
4041 : Intrinsic::x86_avx512_psrl_q_512;
4042 else if (
Size ==
'w')
4043 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
4044 : Intrinsic::x86_avx512_psrl_w_512;
4050 }
else if (Name.starts_with(
"avx512.mask.psra")) {
4051 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
4052 bool IsVariable = Name[16] ==
'v';
4053 char Size = Name[16] ==
'.' ? Name[17]
4054 : Name[17] ==
'.' ? Name[18]
4055 : Name[18] ==
'.' ? Name[19]
4059 if (IsVariable && Name[17] !=
'.') {
4060 if (
Size ==
's' && Name[17] ==
'4')
4061 IID = Intrinsic::x86_avx2_psrav_d;
4062 else if (
Size ==
's' && Name[17] ==
'8')
4063 IID = Intrinsic::x86_avx2_psrav_d_256;
4064 else if (
Size ==
'h' && Name[17] ==
'8')
4065 IID = Intrinsic::x86_avx512_psrav_w_128;
4066 else if (
Size ==
'h' && Name[17] ==
'1')
4067 IID = Intrinsic::x86_avx512_psrav_w_256;
4068 else if (Name[17] ==
'3' && Name[18] ==
'2')
4069 IID = Intrinsic::x86_avx512_psrav_w_512;
4072 }
else if (Name.ends_with(
".128")) {
4074 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4075 : Intrinsic::x86_sse2_psra_d;
4076 else if (
Size ==
'q')
4077 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4078 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4079 : Intrinsic::x86_avx512_psra_q_128;
4080 else if (
Size ==
'w')
4081 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4082 : Intrinsic::x86_sse2_psra_w;
4085 }
else if (Name.ends_with(
".256")) {
4087 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4088 : Intrinsic::x86_avx2_psra_d;
4089 else if (
Size ==
'q')
4090 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4091 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4092 : Intrinsic::x86_avx512_psra_q_256;
4093 else if (
Size ==
'w')
4094 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4095 : Intrinsic::x86_avx2_psra_w;
4100 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4101 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4102 : Intrinsic::x86_avx512_psra_d_512;
4103 else if (
Size ==
'q')
4104 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4105 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4106 : Intrinsic::x86_avx512_psra_q_512;
4107 else if (
Size ==
'w')
4108 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4109 : Intrinsic::x86_avx512_psra_w_512;
4115 }
else if (Name.starts_with(
"avx512.mask.move.s")) {
4117 }
else if (Name.starts_with(
"avx512.cvtmask2")) {
4119 }
else if (Name.ends_with(
".movntdqa")) {
4123 LoadInst *LI = Builder.CreateAlignedLoad(
4128 }
else if (Name.starts_with(
"fma.vfmadd.") ||
4129 Name.starts_with(
"fma.vfmsub.") ||
4130 Name.starts_with(
"fma.vfnmadd.") ||
4131 Name.starts_with(
"fma.vfnmsub.")) {
4132 bool NegMul = Name[6] ==
'n';
4133 bool NegAcc = NegMul ? Name[8] ==
's' : Name[7] ==
's';
4134 bool IsScalar = NegMul ? Name[12] ==
's' : Name[11] ==
's';
4145 if (NegMul && !IsScalar)
4146 Ops[0] = Builder.CreateFNeg(
Ops[0]);
4147 if (NegMul && IsScalar)
4148 Ops[1] = Builder.CreateFNeg(
Ops[1]);
4150 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4152 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
4156 }
else if (Name.starts_with(
"fma4.vfmadd.s")) {
4164 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
4168 }
else if (Name.starts_with(
"avx512.mask.vfmadd.s") ||
4169 Name.starts_with(
"avx512.maskz.vfmadd.s") ||
4170 Name.starts_with(
"avx512.mask3.vfmadd.s") ||
4171 Name.starts_with(
"avx512.mask3.vfmsub.s") ||
4172 Name.starts_with(
"avx512.mask3.vfnmsub.s")) {
4173 bool IsMask3 = Name[11] ==
'3';
4174 bool IsMaskZ = Name[11] ==
'z';
4176 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4177 bool NegMul = Name[2] ==
'n';
4178 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
4184 if (NegMul && (IsMask3 || IsMaskZ))
4185 A = Builder.CreateFNeg(
A);
4186 if (NegMul && !(IsMask3 || IsMaskZ))
4187 B = Builder.CreateFNeg(
B);
4189 C = Builder.CreateFNeg(
C);
4191 A = Builder.CreateExtractElement(
A, (
uint64_t)0);
4192 B = Builder.CreateExtractElement(
B, (
uint64_t)0);
4193 C = Builder.CreateExtractElement(
C, (
uint64_t)0);
4200 if (Name.back() ==
'd')
4201 IID = Intrinsic::x86_avx512_vfmadd_f64;
4203 IID = Intrinsic::x86_avx512_vfmadd_f32;
4204 Rep = Builder.CreateIntrinsic(IID,
Ops);
4206 Rep = Builder.CreateFMA(
A,
B,
C);
4215 if (NegAcc && IsMask3)
4220 Rep = Builder.CreateInsertElement(CI->
getArgOperand(IsMask3 ? 2 : 0), Rep,
4222 }
else if (Name.starts_with(
"avx512.mask.vfmadd.p") ||
4223 Name.starts_with(
"avx512.mask.vfnmadd.p") ||
4224 Name.starts_with(
"avx512.mask.vfnmsub.p") ||
4225 Name.starts_with(
"avx512.mask3.vfmadd.p") ||
4226 Name.starts_with(
"avx512.mask3.vfmsub.p") ||
4227 Name.starts_with(
"avx512.mask3.vfnmsub.p") ||
4228 Name.starts_with(
"avx512.maskz.vfmadd.p")) {
4229 bool IsMask3 = Name[11] ==
'3';
4230 bool IsMaskZ = Name[11] ==
'z';
4232 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4233 bool NegMul = Name[2] ==
'n';
4234 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
4240 if (NegMul && (IsMask3 || IsMaskZ))
4241 A = Builder.CreateFNeg(
A);
4242 if (NegMul && !(IsMask3 || IsMaskZ))
4243 B = Builder.CreateFNeg(
B);
4245 C = Builder.CreateFNeg(
C);
4252 if (Name[Name.size() - 5] ==
's')
4253 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4255 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4259 Rep = Builder.CreateFMA(
A,
B,
C);
4267 }
else if (Name.starts_with(
"fma.vfmsubadd.p")) {
4271 if (VecWidth == 128 && EltWidth == 32)
4272 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4273 else if (VecWidth == 256 && EltWidth == 32)
4274 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4275 else if (VecWidth == 128 && EltWidth == 64)
4276 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4277 else if (VecWidth == 256 && EltWidth == 64)
4278 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4284 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4285 Rep = Builder.CreateIntrinsic(IID,
Ops);
4286 }
else if (Name.starts_with(
"avx512.mask.vfmaddsub.p") ||
4287 Name.starts_with(
"avx512.mask3.vfmaddsub.p") ||
4288 Name.starts_with(
"avx512.maskz.vfmaddsub.p") ||
4289 Name.starts_with(
"avx512.mask3.vfmsubadd.p")) {
4290 bool IsMask3 = Name[11] ==
'3';
4291 bool IsMaskZ = Name[11] ==
'z';
4293 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4294 bool IsSubAdd = Name[3] ==
's';
4298 if (Name[Name.size() - 5] ==
's')
4299 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4301 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4306 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4308 Rep = Builder.CreateIntrinsic(IID,
Ops);
4317 Value *Odd = Builder.CreateCall(FMA,
Ops);
4318 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4319 Value *Even = Builder.CreateCall(FMA,
Ops);
4325 for (
int i = 0; i != NumElts; ++i)
4326 Idxs[i] = i + (i % 2) * NumElts;
4328 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4336 }
else if (Name.starts_with(
"avx512.mask.pternlog.") ||
4337 Name.starts_with(
"avx512.maskz.pternlog.")) {
4338 bool ZeroMask = Name[11] ==
'z';
4342 if (VecWidth == 128 && EltWidth == 32)
4343 IID = Intrinsic::x86_avx512_pternlog_d_128;
4344 else if (VecWidth == 256 && EltWidth == 32)
4345 IID = Intrinsic::x86_avx512_pternlog_d_256;
4346 else if (VecWidth == 512 && EltWidth == 32)
4347 IID = Intrinsic::x86_avx512_pternlog_d_512;
4348 else if (VecWidth == 128 && EltWidth == 64)
4349 IID = Intrinsic::x86_avx512_pternlog_q_128;
4350 else if (VecWidth == 256 && EltWidth == 64)
4351 IID = Intrinsic::x86_avx512_pternlog_q_256;
4352 else if (VecWidth == 512 && EltWidth == 64)
4353 IID = Intrinsic::x86_avx512_pternlog_q_512;
4359 Rep = Builder.CreateIntrinsic(IID, Args);
4363 }
else if (Name.starts_with(
"avx512.mask.vpmadd52") ||
4364 Name.starts_with(
"avx512.maskz.vpmadd52")) {
4365 bool ZeroMask = Name[11] ==
'z';
4366 bool High = Name[20] ==
'h' || Name[21] ==
'h';
4369 if (VecWidth == 128 && !
High)
4370 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4371 else if (VecWidth == 256 && !
High)
4372 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4373 else if (VecWidth == 512 && !
High)
4374 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4375 else if (VecWidth == 128 &&
High)
4376 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4377 else if (VecWidth == 256 &&
High)
4378 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4379 else if (VecWidth == 512 &&
High)
4380 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4386 Rep = Builder.CreateIntrinsic(IID, Args);
4390 }
else if (Name.starts_with(
"avx512.mask.vpermi2var.") ||
4391 Name.starts_with(
"avx512.mask.vpermt2var.") ||
4392 Name.starts_with(
"avx512.maskz.vpermt2var.")) {
4393 bool ZeroMask = Name[11] ==
'z';
4394 bool IndexForm = Name[17] ==
'i';
4396 }
else if (Name.starts_with(
"avx512.mask.vpdpbusd.") ||
4397 Name.starts_with(
"avx512.maskz.vpdpbusd.") ||
4398 Name.starts_with(
"avx512.mask.vpdpbusds.") ||
4399 Name.starts_with(
"avx512.maskz.vpdpbusds.")) {
4400 bool ZeroMask = Name[11] ==
'z';
4401 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4404 if (VecWidth == 128 && !IsSaturating)
4405 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4406 else if (VecWidth == 256 && !IsSaturating)
4407 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4408 else if (VecWidth == 512 && !IsSaturating)
4409 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4410 else if (VecWidth == 128 && IsSaturating)
4411 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4412 else if (VecWidth == 256 && IsSaturating)
4413 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4414 else if (VecWidth == 512 && IsSaturating)
4415 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4425 if (Args[1]->
getType()->isVectorTy() &&
4428 ->isIntegerTy(32) &&
4429 Args[2]->
getType()->isVectorTy() &&
4432 ->isIntegerTy(32)) {
4433 Type *NewArgType =
nullptr;
4434 if (VecWidth == 128)
4436 else if (VecWidth == 256)
4438 else if (VecWidth == 512)
4444 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4445 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4448 Rep = Builder.CreateIntrinsic(IID, Args);
4452 }
else if (Name.starts_with(
"avx512.mask.vpdpwssd.") ||
4453 Name.starts_with(
"avx512.maskz.vpdpwssd.") ||
4454 Name.starts_with(
"avx512.mask.vpdpwssds.") ||
4455 Name.starts_with(
"avx512.maskz.vpdpwssds.")) {
4456 bool ZeroMask = Name[11] ==
'z';
4457 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4460 if (VecWidth == 128 && !IsSaturating)
4461 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4462 else if (VecWidth == 256 && !IsSaturating)
4463 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4464 else if (VecWidth == 512 && !IsSaturating)
4465 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4466 else if (VecWidth == 128 && IsSaturating)
4467 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4468 else if (VecWidth == 256 && IsSaturating)
4469 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4470 else if (VecWidth == 512 && IsSaturating)
4471 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4481 if (Args[1]->
getType()->isVectorTy() &&
4484 ->isIntegerTy(32) &&
4485 Args[2]->
getType()->isVectorTy() &&
4488 ->isIntegerTy(32)) {
4489 Type *NewArgType =
nullptr;
4490 if (VecWidth == 128)
4492 else if (VecWidth == 256)
4494 else if (VecWidth == 512)
4500 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4501 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4504 Rep = Builder.CreateIntrinsic(IID, Args);
4508 }
else if (Name ==
"addcarryx.u32" || Name ==
"addcarryx.u64" ||
4509 Name ==
"addcarry.u32" || Name ==
"addcarry.u64" ||
4510 Name ==
"subborrow.u32" || Name ==
"subborrow.u64") {
4512 if (Name[0] ==
'a' && Name.back() ==
'2')
4513 IID = Intrinsic::x86_addcarry_32;
4514 else if (Name[0] ==
'a' && Name.back() ==
'4')
4515 IID = Intrinsic::x86_addcarry_64;
4516 else if (Name[0] ==
's' && Name.back() ==
'2')
4517 IID = Intrinsic::x86_subborrow_32;
4518 else if (Name[0] ==
's' && Name.back() ==
'4')
4519 IID = Intrinsic::x86_subborrow_64;
4526 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4529 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
4532 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4536 }
else if (Name.starts_with(
"avx512.mask.") &&
4547 if (Name.starts_with(
"neon.bfcvt")) {
4548 if (Name.starts_with(
"neon.bfcvtn2")) {
4550 std::iota(LoMask.
begin(), LoMask.
end(), 0);
4552 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4553 Value *Inactive = Builder.CreateShuffleVector(CI->
getOperand(0), LoMask);
4556 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4557 }
else if (Name.starts_with(
"neon.bfcvtn")) {
4559 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4563 dbgs() <<
"Trunc: " << *Trunc <<
"\n";
4564 return Builder.CreateShuffleVector(
4567 return Builder.CreateFPTrunc(CI->
getOperand(0),
4570 }
else if (Name.starts_with(
"sve.fcvt")) {
4573 .
Case(
"sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4574 .
Case(
"sve.fcvtnt.bf16f32",
4575 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4587 if (Args[1]->
getType() != BadPredTy)
4590 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4591 BadPredTy, Args[1]);
4592 Args[1] = Builder.CreateIntrinsic(
4593 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4595 return Builder.CreateIntrinsic(NewID, Args,
nullptr,
4604 if (Name ==
"mve.vctp64.old") {
4607 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4610 Value *C1 = Builder.CreateIntrinsic(
4611 Intrinsic::arm_mve_pred_v2i,
4613 return Builder.CreateIntrinsic(
4614 Intrinsic::arm_mve_pred_i2v,
4616 }
else if (Name ==
"mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4617 Name ==
"mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4618 Name ==
"mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4619 Name ==
"mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4621 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4622 Name ==
"mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4623 Name ==
"mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4624 Name ==
"mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4626 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4627 Name ==
"mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4628 Name ==
"cde.vcx1q.predicated.v2i64.v4i1" ||
4629 Name ==
"cde.vcx1qa.predicated.v2i64.v4i1" ||
4630 Name ==
"cde.vcx2q.predicated.v2i64.v4i1" ||
4631 Name ==
"cde.vcx2qa.predicated.v2i64.v4i1" ||
4632 Name ==
"cde.vcx3q.predicated.v2i64.v4i1" ||
4633 Name ==
"cde.vcx3qa.predicated.v2i64.v4i1") {
4634 std::vector<Type *> Tys;
4638 case Intrinsic::arm_mve_mull_int_predicated:
4639 case Intrinsic::arm_mve_vqdmull_predicated:
4640 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4643 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4644 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4645 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4649 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4653 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4657 case Intrinsic::arm_cde_vcx1q_predicated:
4658 case Intrinsic::arm_cde_vcx1qa_predicated:
4659 case Intrinsic::arm_cde_vcx2q_predicated:
4660 case Intrinsic::arm_cde_vcx2qa_predicated:
4661 case Intrinsic::arm_cde_vcx3q_predicated:
4662 case Intrinsic::arm_cde_vcx3qa_predicated:
4669 std::vector<Value *>
Ops;
4671 Type *Ty =
Op->getType();
4672 if (Ty->getScalarSizeInBits() == 1) {
4673 Value *C1 = Builder.CreateIntrinsic(
4674 Intrinsic::arm_mve_pred_v2i,
4676 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4681 return Builder.CreateIntrinsic(
ID, Tys,
Ops,
nullptr,
4696 auto UpgradeLegacyWMMAIUIntrinsicCall =
4701 Args.push_back(Builder.getFalse());
4705 F->getParent(),
F->getIntrinsicID(), OverloadTys);
4712 auto *NewCall =
cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4717 NewCall->copyMetadata(*CI);
4721 if (
F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
4722 assert(CI->
arg_size() == 7 &&
"Legacy int_amdgcn_wmma_i32_16x16x64_iu8 "
4723 "intrinsic should have 7 arguments");
4726 return UpgradeLegacyWMMAIUIntrinsicCall(
F, CI, Builder, {
T1, T2});
4728 if (
F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8) {
4729 assert(CI->
arg_size() == 8 &&
"Legacy int_amdgcn_swmmac_i32_16x16x128_iu8 "
4730 "intrinsic should have 8 arguments");
4735 return UpgradeLegacyWMMAIUIntrinsicCall(
F, CI, Builder, {
T1, T2, T3, T4});
4738 switch (
F->getIntrinsicID()) {
4741 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
4742 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
4743 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
4744 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
4745 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
4746 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16: {
4761 if (
F->getIntrinsicID() == Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16)
4764 F->getParent(),
F->getIntrinsicID(), Overloads);
4769 auto *NewCall =
cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4774 NewCall->copyMetadata(*CI);
4775 NewCall->takeName(CI);
4797 if (NumOperands < 3)
4810 bool IsVolatile =
false;
4814 if (NumOperands > 3)
4819 if (NumOperands > 5) {
4821 IsVolatile = !VolatileArg || !VolatileArg->
isZero();
4835 if (VT->getElementType()->isIntegerTy(16)) {
4838 Val = Builder.CreateBitCast(Val, AsBF16);
4846 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4848 unsigned AddrSpace = PtrTy->getAddressSpace();
4851 RMW->
setMetadata(
"amdgpu.no.fine.grained.memory", EmptyMD);
4853 RMW->
setMetadata(
"amdgpu.ignore.denormal.mode", EmptyMD);
4858 MDNode *RangeNotPrivate =
4861 RMW->
setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4867 return Builder.CreateBitCast(RMW, RetTy);
4888 return MAV->getMetadata();
4895 return I->getDebugLoc().getAsMDNode();
4903 if (Name ==
"label") {
4906 }
else if (Name ==
"assign") {
4913 }
else if (Name ==
"declare") {
4918 }
else if (Name ==
"addr") {
4928 unwrapMAVOp(CI, 1), ExprNode,
nullptr,
nullptr,
nullptr,
4930 }
else if (Name ==
"value") {
4933 unsigned ExprOp = 2;
4947 assert(DR &&
"Unhandled intrinsic kind in upgrade to DbgRecord");
4955 int64_t OffsetVal =
Offset->getSExtValue();
4956 return Builder.CreateIntrinsic(OffsetVal >= 0
4957 ? Intrinsic::vector_splice_left
4958 : Intrinsic::vector_splice_right,
4960 {CI->getArgOperand(0), CI->getArgOperand(1),
4961 Builder.getInt32(std::abs(OffsetVal))});
4966 if (Name.starts_with(
"to.fp16")) {
4968 Builder.CreateFPTrunc(CI->
getArgOperand(0), Builder.getHalfTy());
4969 return Builder.CreateBitCast(Cast, CI->
getType());
4972 if (Name.starts_with(
"from.fp16")) {
4974 Builder.CreateBitCast(CI->
getArgOperand(0), Builder.getHalfTy());
4975 return Builder.CreateFPExt(Cast, CI->
getType());
5000 if (!Name.consume_front(
"llvm."))
5003 bool IsX86 = Name.consume_front(
"x86.");
5004 bool IsNVVM = Name.consume_front(
"nvvm.");
5005 bool IsAArch64 = Name.consume_front(
"aarch64.");
5006 bool IsARM = Name.consume_front(
"arm.");
5007 bool IsAMDGCN = Name.consume_front(
"amdgcn.");
5008 bool IsDbg = Name.consume_front(
"dbg.");
5010 (Name.consume_front(
"experimental.vector.splice") ||
5011 Name.consume_front(
"vector.splice")) &&
5012 !(Name.starts_with(
".left") || Name.starts_with(
".right"));
5013 Value *Rep =
nullptr;
5015 if (!IsX86 && Name ==
"stackprotectorcheck") {
5017 }
else if (IsNVVM) {
5021 }
else if (IsAArch64) {
5025 }
else if (IsAMDGCN) {
5029 }
else if (IsOldSplice) {
5031 }
else if (Name.consume_front(
"convert.")) {
5043 const auto &DefaultCase = [&]() ->
void {
5051 "Unknown function for CallBase upgrade and isn't just a name change");
5059 "Return type must have changed");
5060 assert(OldST->getNumElements() ==
5062 "Must have same number of elements");
5065 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
5068 for (
unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
5069 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
5070 Res = Builder.CreateInsertValue(Res, Elem, Idx);
5089 case Intrinsic::arm_neon_vst1:
5090 case Intrinsic::arm_neon_vst2:
5091 case Intrinsic::arm_neon_vst3:
5092 case Intrinsic::arm_neon_vst4:
5093 case Intrinsic::arm_neon_vst2lane:
5094 case Intrinsic::arm_neon_vst3lane:
5095 case Intrinsic::arm_neon_vst4lane: {
5097 NewCall = Builder.CreateCall(NewFn, Args);
5100 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
5101 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
5102 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
5107 NewCall = Builder.CreateCall(NewFn, Args);
5110 case Intrinsic::aarch64_sve_ld3_sret:
5111 case Intrinsic::aarch64_sve_ld4_sret:
5112 case Intrinsic::aarch64_sve_ld2_sret: {
5120 Name = Name.substr(5);
5127 unsigned MinElts = RetTy->getMinNumElements() /
N;
5129 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
5131 for (
unsigned I = 0;
I <
N;
I++) {
5132 Value *SRet = Builder.CreateExtractValue(NewLdCall,
I);
5133 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet,
I * MinElts);
5139 case Intrinsic::coro_end: {
5142 NewCall = Builder.CreateCall(NewFn, Args);
5146 case Intrinsic::vector_extract: {
5148 Name = Name.substr(5);
5149 if (!Name.starts_with(
"aarch64.sve.tuple.get")) {
5154 unsigned MinElts = RetTy->getMinNumElements();
5157 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0), NewIdx});
5161 case Intrinsic::vector_insert: {
5163 Name = Name.substr(5);
5164 if (!Name.starts_with(
"aarch64.sve.tuple")) {
5168 if (Name.starts_with(
"aarch64.sve.tuple.set")) {
5173 NewCall = Builder.CreateCall(
5177 if (Name.starts_with(
"aarch64.sve.tuple.create")) {
5183 assert(
N > 1 &&
"Create is expected to be between 2-4");
5186 unsigned MinElts = RetTy->getMinNumElements() /
N;
5187 for (
unsigned I = 0;
I <
N;
I++) {
5189 Ret = Builder.CreateInsertVector(RetTy, Ret, V,
I * MinElts);
5196 case Intrinsic::arm_neon_bfdot:
5197 case Intrinsic::arm_neon_bfmmla:
5198 case Intrinsic::arm_neon_bfmlalb:
5199 case Intrinsic::arm_neon_bfmlalt:
5200 case Intrinsic::aarch64_neon_bfdot:
5201 case Intrinsic::aarch64_neon_bfmmla:
5202 case Intrinsic::aarch64_neon_bfmlalb:
5203 case Intrinsic::aarch64_neon_bfmlalt: {
5206 "Mismatch between function args and call args");
5207 size_t OperandWidth =
5209 assert((OperandWidth == 64 || OperandWidth == 128) &&
5210 "Unexpected operand width");
5212 auto Iter = CI->
args().begin();
5213 Args.push_back(*Iter++);
5214 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5215 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5216 NewCall = Builder.CreateCall(NewFn, Args);
5220 case Intrinsic::bitreverse:
5221 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5224 case Intrinsic::ctlz:
5225 case Intrinsic::cttz: {
5232 Builder.CreateCall(NewFn, {CI->
getArgOperand(0), Builder.getFalse()});
5236 case Intrinsic::objectsize: {
5237 Value *NullIsUnknownSize =
5241 NewCall = Builder.CreateCall(
5246 case Intrinsic::ctpop:
5247 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5249 case Intrinsic::dbg_value: {
5251 Name = Name.substr(5);
5253 if (Name.starts_with(
"dbg.addr")) {
5267 if (
Offset->isNullValue()) {
5268 NewCall = Builder.CreateCall(
5277 case Intrinsic::ptr_annotation:
5285 NewCall = Builder.CreateCall(
5294 case Intrinsic::var_annotation:
5301 NewCall = Builder.CreateCall(
5310 case Intrinsic::riscv_aes32dsi:
5311 case Intrinsic::riscv_aes32dsmi:
5312 case Intrinsic::riscv_aes32esi:
5313 case Intrinsic::riscv_aes32esmi:
5314 case Intrinsic::riscv_sm4ks:
5315 case Intrinsic::riscv_sm4ed: {
5325 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5326 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5332 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5333 Value *Res = NewCall;
5335 Res = Builder.CreateIntCast(NewCall, CI->
getType(),
true);
5341 case Intrinsic::nvvm_mapa_shared_cluster: {
5345 Value *Res = NewCall;
5346 Res = Builder.CreateAddrSpaceCast(
5353 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5354 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5357 Args[0] = Builder.CreateAddrSpaceCast(
5360 NewCall = Builder.CreateCall(NewFn, Args);
5366 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5367 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5368 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5369 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5370 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5371 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5372 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5373 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5380 Args[0] = Builder.CreateAddrSpaceCast(
5389 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5391 NewCall = Builder.CreateCall(NewFn, Args);
5397 case Intrinsic::riscv_sha256sig0:
5398 case Intrinsic::riscv_sha256sig1:
5399 case Intrinsic::riscv_sha256sum0:
5400 case Intrinsic::riscv_sha256sum1:
5401 case Intrinsic::riscv_sm3p0:
5402 case Intrinsic::riscv_sm3p1: {
5409 Builder.CreateTrunc(CI->
getArgOperand(0), Builder.getInt32Ty());
5411 NewCall = Builder.CreateCall(NewFn, Arg);
5413 Builder.CreateIntCast(NewCall, CI->
getType(),
true);
5420 case Intrinsic::x86_xop_vfrcz_ss:
5421 case Intrinsic::x86_xop_vfrcz_sd:
5422 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(1)});
5425 case Intrinsic::x86_xop_vpermil2pd:
5426 case Intrinsic::x86_xop_vpermil2ps:
5427 case Intrinsic::x86_xop_vpermil2pd_256:
5428 case Intrinsic::x86_xop_vpermil2ps_256: {
5432 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5433 NewCall = Builder.CreateCall(NewFn, Args);
5437 case Intrinsic::x86_sse41_ptestc:
5438 case Intrinsic::x86_sse41_ptestz:
5439 case Intrinsic::x86_sse41_ptestnzc: {
5453 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy,
"cast");
5454 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
5456 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5460 case Intrinsic::x86_rdtscp: {
5466 NewCall = Builder.CreateCall(NewFn);
5468 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
5471 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5479 case Intrinsic::x86_sse41_insertps:
5480 case Intrinsic::x86_sse41_dppd:
5481 case Intrinsic::x86_sse41_dpps:
5482 case Intrinsic::x86_sse41_mpsadbw:
5483 case Intrinsic::x86_avx_dp_ps_256:
5484 case Intrinsic::x86_avx2_mpsadbw: {
5490 Args.back() = Builder.CreateTrunc(Args.back(),
Type::getInt8Ty(
C),
"trunc");
5491 NewCall = Builder.CreateCall(NewFn, Args);
5495 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5496 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5497 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5498 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5499 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5500 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5506 NewCall = Builder.CreateCall(NewFn, Args);
5515 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5516 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5517 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5518 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5519 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5520 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5524 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5525 Args[1] = Builder.CreateBitCast(
5528 NewCall = Builder.CreateCall(NewFn, Args);
5529 Value *Res = Builder.CreateBitCast(
5537 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5538 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5539 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5543 Args[1] = Builder.CreateBitCast(
5545 Args[2] = Builder.CreateBitCast(
5548 NewCall = Builder.CreateCall(NewFn, Args);
5552 case Intrinsic::thread_pointer: {
5553 NewCall = Builder.CreateCall(NewFn, {});
5557 case Intrinsic::memcpy:
5558 case Intrinsic::memmove:
5559 case Intrinsic::memset: {
5575 NewCall = Builder.CreateCall(NewFn, Args);
5577 AttributeList NewAttrs = AttributeList::get(
5578 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5579 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5580 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5585 MemCI->setDestAlignment(
Align->getMaybeAlignValue());
5588 MTI->setSourceAlignment(
Align->getMaybeAlignValue());
5592 case Intrinsic::masked_load:
5593 case Intrinsic::masked_gather:
5594 case Intrinsic::masked_store:
5595 case Intrinsic::masked_scatter: {
5601 auto GetMaybeAlign = [](
Value *
Op) {
5611 auto GetAlign = [&](
Value *
Op) {
5620 case Intrinsic::masked_load:
5621 NewCall = Builder.CreateMaskedLoad(
5625 case Intrinsic::masked_gather:
5626 NewCall = Builder.CreateMaskedGather(
5632 case Intrinsic::masked_store:
5633 NewCall = Builder.CreateMaskedStore(
5637 case Intrinsic::masked_scatter:
5638 NewCall = Builder.CreateMaskedScatter(
5640 DL.getValueOrABITypeAlignment(
5654 case Intrinsic::lifetime_start:
5655 case Intrinsic::lifetime_end: {
5667 NewCall = Builder.CreateLifetimeStart(Ptr);
5669 NewCall = Builder.CreateLifetimeEnd(Ptr);
5678 case Intrinsic::x86_avx512_vpdpbusd_128:
5679 case Intrinsic::x86_avx512_vpdpbusd_256:
5680 case Intrinsic::x86_avx512_vpdpbusd_512:
5681 case Intrinsic::x86_avx512_vpdpbusds_128:
5682 case Intrinsic::x86_avx512_vpdpbusds_256:
5683 case Intrinsic::x86_avx512_vpdpbusds_512:
5684 case Intrinsic::x86_avx2_vpdpbssd_128:
5685 case Intrinsic::x86_avx2_vpdpbssd_256:
5686 case Intrinsic::x86_avx10_vpdpbssd_512:
5687 case Intrinsic::x86_avx2_vpdpbssds_128:
5688 case Intrinsic::x86_avx2_vpdpbssds_256:
5689 case Intrinsic::x86_avx10_vpdpbssds_512:
5690 case Intrinsic::x86_avx2_vpdpbsud_128:
5691 case Intrinsic::x86_avx2_vpdpbsud_256:
5692 case Intrinsic::x86_avx10_vpdpbsud_512:
5693 case Intrinsic::x86_avx2_vpdpbsuds_128:
5694 case Intrinsic::x86_avx2_vpdpbsuds_256:
5695 case Intrinsic::x86_avx10_vpdpbsuds_512:
5696 case Intrinsic::x86_avx2_vpdpbuud_128:
5697 case Intrinsic::x86_avx2_vpdpbuud_256:
5698 case Intrinsic::x86_avx10_vpdpbuud_512:
5699 case Intrinsic::x86_avx2_vpdpbuuds_128:
5700 case Intrinsic::x86_avx2_vpdpbuuds_256:
5701 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5706 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5707 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5709 NewCall = Builder.CreateCall(NewFn, Args);
5712 case Intrinsic::x86_avx512_vpdpwssd_128:
5713 case Intrinsic::x86_avx512_vpdpwssd_256:
5714 case Intrinsic::x86_avx512_vpdpwssd_512:
5715 case Intrinsic::x86_avx512_vpdpwssds_128:
5716 case Intrinsic::x86_avx512_vpdpwssds_256:
5717 case Intrinsic::x86_avx512_vpdpwssds_512:
5718 case Intrinsic::x86_avx2_vpdpwsud_128:
5719 case Intrinsic::x86_avx2_vpdpwsud_256:
5720 case Intrinsic::x86_avx10_vpdpwsud_512:
5721 case Intrinsic::x86_avx2_vpdpwsuds_128:
5722 case Intrinsic::x86_avx2_vpdpwsuds_256:
5723 case Intrinsic::x86_avx10_vpdpwsuds_512:
5724 case Intrinsic::x86_avx2_vpdpwusd_128:
5725 case Intrinsic::x86_avx2_vpdpwusd_256:
5726 case Intrinsic::x86_avx10_vpdpwusd_512:
5727 case Intrinsic::x86_avx2_vpdpwusds_128:
5728 case Intrinsic::x86_avx2_vpdpwusds_256:
5729 case Intrinsic::x86_avx10_vpdpwusds_512:
5730 case Intrinsic::x86_avx2_vpdpwuud_128:
5731 case Intrinsic::x86_avx2_vpdpwuud_256:
5732 case Intrinsic::x86_avx10_vpdpwuud_512:
5733 case Intrinsic::x86_avx2_vpdpwuuds_128:
5734 case Intrinsic::x86_avx2_vpdpwuuds_256:
5735 case Intrinsic::x86_avx10_vpdpwuuds_512:
5740 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5741 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5743 NewCall = Builder.CreateCall(NewFn, Args);
5746 assert(NewCall &&
"Should have either set this variable or returned through "
5747 "the default case");
5754 assert(
F &&
"Illegal attempt to upgrade a non-existent intrinsic.");
5768 F->eraseFromParent();
5774 if (NumOperands == 0)
5782 if (NumOperands == 3) {
5786 Metadata *Elts2[] = {ScalarType, ScalarType,
5800 if (
Opc != Instruction::BitCast)
5804 Type *SrcTy = V->getType();
5821 if (
Opc != Instruction::BitCast)
5824 Type *SrcTy =
C->getType();
5851 if (
NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5852 auto OpIt =
find_if(ModFlags->operands(), [](
const MDNode *Flag) {
5853 if (Flag->getNumOperands() < 3)
5855 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5856 return K->getString() ==
"Debug Info Version";
5859 if (OpIt != ModFlags->op_end()) {
5860 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5867 bool BrokenDebugInfo =
false;
5870 if (!BrokenDebugInfo)
5876 M.getContext().diagnose(Diag);
5883 M.getContext().diagnose(DiagVersion);
5893 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5896 if (
F->hasFnAttribute(Attr)) {
5899 StringRef S =
F->getFnAttribute(Attr).getValueAsString();
5901 auto [Part, Rest] = S.
split(
',');
5907 const unsigned Dim = DimC -
'x';
5908 assert(Dim < 3 &&
"Unexpected dim char");
5918 F->addFnAttr(Attr, NewAttr);
5922 return S ==
"x" || S ==
"y" || S ==
"z";
5927 if (K ==
"kernel") {
5939 const unsigned Idx = (AlignIdxValuePair >> 16);
5940 const Align StackAlign =
Align(AlignIdxValuePair & 0xFFFF);
5945 if (K ==
"maxclusterrank" || K ==
"cluster_max_blocks") {
5950 if (K ==
"minctasm") {
5955 if (K ==
"maxnreg") {
5960 if (K.consume_front(
"maxntid") &&
isXYZ(K)) {
5964 if (K.consume_front(
"reqntid") &&
isXYZ(K)) {
5968 if (K.consume_front(
"cluster_dim_") &&
isXYZ(K)) {
5972 if (K ==
"grid_constant") {
5987 NamedMDNode *NamedMD = M.getNamedMetadata(
"nvvm.annotations");
5994 if (!SeenNodes.
insert(MD).second)
6001 assert((MD->getNumOperands() % 2) == 1 &&
"Invalid number of operands");
6008 for (
unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
6010 const MDOperand &V = MD->getOperand(j + 1);
6013 NewOperands.
append({K, V});
6016 if (NewOperands.
size() > 1)
6029 const char *MarkerKey =
"clang.arc.retainAutoreleasedReturnValueMarker";
6030 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
6031 if (ModRetainReleaseMarker) {
6037 ID->getString().split(ValueComp,
"#");
6038 if (ValueComp.
size() == 2) {
6039 std::string NewValue = ValueComp[0].str() +
";" + ValueComp[1].str();
6043 M.eraseNamedMetadata(ModRetainReleaseMarker);
6054 auto UpgradeToIntrinsic = [&](
const char *OldFunc,
6080 bool InvalidCast =
false;
6082 for (
unsigned I = 0, E = CI->
arg_size();
I != E; ++
I) {
6095 Arg = Builder.CreateBitCast(Arg, NewFuncTy->
getParamType(
I));
6097 Args.push_back(Arg);
6104 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
6109 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->
getType());
6122 UpgradeToIntrinsic(
"clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
6130 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
6131 {
"objc_autorelease", llvm::Intrinsic::objc_autorelease},
6132 {
"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
6133 {
"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
6134 {
"objc_autoreleaseReturnValue",
6135 llvm::Intrinsic::objc_autoreleaseReturnValue},
6136 {
"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
6137 {
"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
6138 {
"objc_initWeak", llvm::Intrinsic::objc_initWeak},
6139 {
"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
6140 {
"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
6141 {
"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
6142 {
"objc_release", llvm::Intrinsic::objc_release},
6143 {
"objc_retain", llvm::Intrinsic::objc_retain},
6144 {
"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
6145 {
"objc_retainAutoreleaseReturnValue",
6146 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
6147 {
"objc_retainAutoreleasedReturnValue",
6148 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
6149 {
"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
6150 {
"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
6151 {
"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
6152 {
"objc_unsafeClaimAutoreleasedReturnValue",
6153 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
6154 {
"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
6155 {
"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
6156 {
"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
6157 {
"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
6158 {
"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
6159 {
"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
6160 {
"objc_arc_annotation_topdown_bbstart",
6161 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
6162 {
"objc_arc_annotation_topdown_bbend",
6163 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
6164 {
"objc_arc_annotation_bottomup_bbstart",
6165 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
6166 {
"objc_arc_annotation_bottomup_bbend",
6167 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
6169 for (
auto &
I : RuntimeFuncs)
6170 UpgradeToIntrinsic(
I.first,
I.second);
6174 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6178 bool HasObjCFlag =
false, HasClassProperties =
false,
Changed =
false;
6179 bool HasSwiftVersionFlag =
false;
6180 uint8_t SwiftMajorVersion, SwiftMinorVersion;
6187 if (
Op->getNumOperands() != 3)
6201 if (
ID->getString() ==
"Objective-C Image Info Version")
6203 if (
ID->getString() ==
"Objective-C Class Properties")
6204 HasClassProperties =
true;
6206 if (
ID->getString() ==
"PIC Level") {
6207 if (
auto *Behavior =
6209 uint64_t V = Behavior->getLimitedValue();
6215 if (
ID->getString() ==
"PIE Level")
6216 if (
auto *Behavior =
6223 if (
ID->getString() ==
"branch-target-enforcement" ||
6224 ID->getString().starts_with(
"sign-return-address")) {
6225 if (
auto *Behavior =
6231 Op->getOperand(1),
Op->getOperand(2)};
6241 if (
ID->getString() ==
"Objective-C Image Info Section") {
6244 Value->getString().split(ValueComp,
" ");
6245 if (ValueComp.
size() != 1) {
6246 std::string NewValue;
6247 for (
auto &S : ValueComp)
6248 NewValue += S.str();
6259 if (
ID->getString() ==
"Objective-C Garbage Collection") {
6262 assert(Md->getValue() &&
"Expected non-empty metadata");
6263 auto Type = Md->getValue()->getType();
6266 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6267 if ((Val & 0xff) != Val) {
6268 HasSwiftVersionFlag =
true;
6269 SwiftABIVersion = (Val & 0xff00) >> 8;
6270 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6271 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6282 if (
ID->getString() ==
"amdgpu_code_object_version") {
6285 MDString::get(M.getContext(),
"amdhsa_code_object_version"),
6297 if (HasObjCFlag && !HasClassProperties) {
6303 if (HasSwiftVersionFlag) {
6307 ConstantInt::get(Int8Ty, SwiftMajorVersion));
6309 ConstantInt::get(Int8Ty, SwiftMinorVersion));
6317 auto TrimSpaces = [](
StringRef Section) -> std::string {
6319 Section.split(Components,
',');
6324 for (
auto Component : Components)
6325 OS <<
',' << Component.trim();
6330 for (
auto &GV : M.globals()) {
6331 if (!GV.hasSection())
6336 if (!Section.starts_with(
"__DATA, __objc_catlist"))
6341 GV.setSection(TrimSpaces(Section));
6357struct StrictFPUpgradeVisitor :
public InstVisitor<StrictFPUpgradeVisitor> {
6358 StrictFPUpgradeVisitor() =
default;
6361 if (!
Call.isStrictFP())
6367 Call.removeFnAttr(Attribute::StrictFP);
6368 Call.addFnAttr(Attribute::NoBuiltin);
6373struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6374 :
public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6375 AMDGPUUnsafeFPAtomicsUpgradeVisitor() =
default;
6377 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6392 if (!
F.isDeclaration() && !
F.hasFnAttribute(Attribute::StrictFP)) {
6393 StrictFPUpgradeVisitor SFPV;
6398 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6399 F.getReturnType(),
F.getAttributes().getRetAttrs()));
6400 for (
auto &Arg :
F.args())
6402 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6404 bool AddingAttrs =
false, RemovingAttrs =
false;
6405 AttrBuilder AttrsToAdd(
F.getContext());
6410 if (
Attribute A =
F.getFnAttribute(
"implicit-section-name");
6411 A.isValid() &&
A.isStringAttribute()) {
6412 F.setSection(
A.getValueAsString());
6414 RemovingAttrs =
true;
6418 A.isValid() &&
A.isStringAttribute()) {
6421 AddingAttrs = RemovingAttrs =
true;
6424 if (
Attribute A =
F.getFnAttribute(
"uniform-work-group-size");
6425 A.isValid() &&
A.isStringAttribute() && !
A.getValueAsString().empty()) {
6427 RemovingAttrs =
true;
6428 if (
A.getValueAsString() ==
"true") {
6429 AttrsToAdd.addAttribute(
"uniform-work-group-size");
6438 if (
Attribute A =
F.getFnAttribute(
"amdgpu-unsafe-fp-atomics");
6441 if (
A.getValueAsBool()) {
6442 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6448 AttrsToRemove.
addAttribute(
"amdgpu-unsafe-fp-atomics");
6449 RemovingAttrs =
true;
6456 bool HandleDenormalMode =
false;
6458 if (
Attribute Attr =
F.getFnAttribute(
"denormal-fp-math"); Attr.isValid()) {
6461 DenormalFPMath = ParsedMode;
6463 AddingAttrs = RemovingAttrs =
true;
6464 HandleDenormalMode =
true;
6468 if (
Attribute Attr =
F.getFnAttribute(
"denormal-fp-math-f32");
6472 DenormalFPMathF32 = ParsedMode;
6474 AddingAttrs = RemovingAttrs =
true;
6475 HandleDenormalMode =
true;
6479 if (HandleDenormalMode)
6480 AttrsToAdd.addDenormalFPEnvAttr(
6484 F.removeFnAttrs(AttrsToRemove);
6487 F.addFnAttrs(AttrsToAdd);
6493 if (!
F.hasFnAttribute(FnAttrName))
6494 F.addFnAttr(FnAttrName,
Value);
6501 if (!
F.hasFnAttribute(FnAttrName)) {
6503 F.addFnAttr(FnAttrName);
6505 auto A =
F.getFnAttribute(FnAttrName);
6506 if (
"false" ==
A.getValueAsString())
6507 F.removeFnAttr(FnAttrName);
6508 else if (
"true" ==
A.getValueAsString()) {
6509 F.removeFnAttr(FnAttrName);
6510 F.addFnAttr(FnAttrName);
6516 Triple T(M.getTargetTriple());
6517 if (!
T.isThumb() && !
T.isARM() && !
T.isAArch64())
6527 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6531 if (
Op->getNumOperands() != 3)
6540 uint64_t *ValPtr = IDStr ==
"branch-target-enforcement" ? &BTEValue
6541 : IDStr ==
"branch-protection-pauth-lr" ? &BPPLRValue
6542 : IDStr ==
"guarded-control-stack" ? &GCSValue
6543 : IDStr ==
"sign-return-address" ? &SRAValue
6544 : IDStr ==
"sign-return-address-all" ? &SRAALLValue
6545 : IDStr ==
"sign-return-address-with-bkey"
6551 *ValPtr = CI->getZExtValue();
6557 bool BTE = BTEValue == 1;
6558 bool BPPLR = BPPLRValue == 1;
6559 bool GCS = GCSValue == 1;
6560 bool SRA = SRAValue == 1;
6563 if (SRA && SRAALLValue == 1)
6564 SignTypeValue =
"all";
6567 if (SRA && SRABKeyValue == 1)
6568 SignKeyValue =
"b_key";
6570 for (
Function &
F : M.getFunctionList()) {
6571 if (
F.isDeclaration())
6578 if (
auto A =
F.getFnAttribute(
"sign-return-address");
6579 A.isValid() &&
"none" ==
A.getValueAsString()) {
6580 F.removeFnAttr(
"sign-return-address");
6581 F.removeFnAttr(
"sign-return-address-key");
6597 if (SRAALLValue == 1)
6599 if (SRABKeyValue == 1)
6608 if (
T->getNumOperands() < 1)
6613 return S->getString().starts_with(
"llvm.vectorizer.");
6617 StringRef OldPrefix =
"llvm.vectorizer.";
6620 if (OldTag ==
"llvm.vectorizer.unroll")
6632 if (
T->getNumOperands() < 1)
6637 if (!OldTag->getString().starts_with(
"llvm.vectorizer."))
6642 Ops.reserve(
T->getNumOperands());
6644 for (
unsigned I = 1,
E =
T->getNumOperands();
I !=
E; ++
I)
6645 Ops.push_back(
T->getOperand(
I));
6659 Ops.reserve(
T->getNumOperands());
6670 if ((
T.isSPIR() || (
T.isSPIRV() && !
T.isSPIRVLogical())) &&
6671 !
DL.contains(
"-G") && !
DL.starts_with(
"G")) {
6672 return DL.empty() ? std::string(
"G1") : (
DL +
"-G1").str();
6675 if (
T.isLoongArch64() ||
T.isRISCV64()) {
6677 auto I =
DL.find(
"-n64-");
6679 return (
DL.take_front(
I) +
"-n32:64-" +
DL.drop_front(
I + 5)).str();
6684 std::string Res =
DL.str();
6687 if (!
DL.contains(
"-G") && !
DL.starts_with(
"G"))
6688 Res.append(Res.empty() ?
"G1" :
"-G1");
6696 if (!
DL.contains(
"-ni") && !
DL.starts_with(
"ni"))
6697 Res.append(
"-ni:7:8:9");
6699 if (
DL.ends_with(
"ni:7"))
6701 if (
DL.ends_with(
"ni:7:8"))
6706 if (!
DL.contains(
"-p7") && !
DL.starts_with(
"p7"))
6707 Res.append(
"-p7:160:256:256:32");
6708 if (!
DL.contains(
"-p8") && !
DL.starts_with(
"p8"))
6709 Res.append(
"-p8:128:128:128:48");
6710 constexpr StringRef OldP8(
"-p8:128:128-");
6711 if (
DL.contains(OldP8))
6712 Res.replace(Res.find(OldP8), OldP8.
size(),
"-p8:128:128:128:48-");
6713 if (!
DL.contains(
"-p9") && !
DL.starts_with(
"p9"))
6714 Res.append(
"-p9:192:256:256:32");
6718 if (!
DL.contains(
"m:e"))
6719 Res = Res.empty() ?
"m:e" :
"m:e-" + Res;
6724 if (
T.isSystemZ() && !
DL.empty()) {
6726 if (!
DL.contains(
"-S64"))
6727 return "E-S64" +
DL.drop_front(1).str();
6731 auto AddPtr32Ptr64AddrSpaces = [&
DL, &Res]() {
6734 StringRef AddrSpaces{
"-p270:32:32-p271:32:32-p272:64:64"};
6735 if (!
DL.contains(AddrSpaces)) {
6737 Regex R(
"^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6738 if (R.match(Res, &
Groups))
6744 if (
T.isAArch64()) {
6746 if (!
DL.empty() && !
DL.contains(
"-Fn32"))
6747 Res.append(
"-Fn32");
6748 AddPtr32Ptr64AddrSpaces();
6752 if (
T.isSPARC() || (
T.isMIPS64() && !
DL.contains(
"m:m")) ||
T.isPPC64() ||
6756 std::string I64 =
"-i64:64";
6757 std::string I128 =
"-i128:128";
6759 size_t Pos = Res.find(I64);
6760 if (Pos !=
size_t(-1))
6761 Res.insert(Pos + I64.size(), I128);
6765 if (
T.isPPC() &&
T.isOSAIX() && !
DL.contains(
"f64:32:64") && !
DL.empty()) {
6766 size_t Pos = Res.find(
"-S128");
6769 Res.insert(Pos,
"-f64:32:64");
6775 AddPtr32Ptr64AddrSpaces();
6783 if (!
T.isOSIAMCU()) {
6784 std::string I128 =
"-i128:128";
6787 Regex R(
"^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6788 if (R.match(Res, &
Groups))
6796 if (
T.isWindowsMSVCEnvironment() && !
T.isArch64Bit()) {
6798 auto I =
Ref.find(
"-f80:32-");
6800 Res = (
Ref.take_front(
I) +
"-f80:128-" +
Ref.drop_front(
I + 8)).str();
6808 Attribute A =
B.getAttribute(
"no-frame-pointer-elim");
6811 FramePointer =
A.getValueAsString() ==
"true" ?
"all" :
"none";
6812 B.removeAttribute(
"no-frame-pointer-elim");
6814 if (
B.contains(
"no-frame-pointer-elim-non-leaf")) {
6816 if (FramePointer !=
"all")
6817 FramePointer =
"non-leaf";
6818 B.removeAttribute(
"no-frame-pointer-elim-non-leaf");
6820 if (!FramePointer.
empty())
6821 B.addAttribute(
"frame-pointer", FramePointer);
6823 A =
B.getAttribute(
"null-pointer-is-valid");
6826 bool NullPointerIsValid =
A.getValueAsString() ==
"true";
6827 B.removeAttribute(
"null-pointer-is-valid");
6828 if (NullPointerIsValid)
6829 B.addAttribute(Attribute::NullPointerIsValid);
6832 A =
B.getAttribute(
"uniform-work-group-size");
6836 bool IsTrue = Val ==
"true";
6837 B.removeAttribute(
"uniform-work-group-size");
6839 B.addAttribute(
"uniform-work-group-size");
6850 return OBD.
getTag() ==
"clang.arc.attachedcall" &&
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static bool convertIntrinsicValidType(StringRef Name, const FunctionType *FuncTy)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeVectorSplice(CallBase *CI, IRBuilder<> &Builder)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static void reportFatalUsageErrorWithCI(StringRef reason, CallBase *CI)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static Value * upgradeConvertIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)
static MDNode * getDebugLocSafe(const Instruction *I)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
NVPTX address space definition.
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Class for arbitrary precision integers.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
This class stores enough information to efficiently remove some attributes from an existing AttrBuild...
AttributeMask & addAttribute(Attribute::AttrKind Val)
Add an attribute to the mask.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Convenience struct for specifying and reasoning about fast-math flags.
void setApproxFunc(bool B=true)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
const Function & getFunction() const
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Type * getReturnType() const
Returns the type of the ret val.
Argument * getArg(unsigned i) const
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
const MDOperand & getOperand(unsigned I) const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
unsigned getNumOperands() const
Return number of MDNode operands.
LLVMContext & getContext() const
Tracking metadata reference owned by Metadata.
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
A Module instance is used to store all the information related to an LLVM module.
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
@ Min
Takes the min of the two values, which are required to be integers.
@ Max
Takes the max of the two values, which are required to be integers.
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
static constexpr size_t npos
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
Get the string size.
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< user_iterator > users()
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
const ParentTy * getParent() const
self_iterator getIterator()
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool isSignatureValid(Intrinsic::ID ID, FunctionType *FT, SmallVectorImpl< Type * > &OverloadTys, raw_ostream &OS=nulls())
Returns true if FT is a valid function type for intrinsic ID.
LLVM_ABI bool hasStructReturnType(ID id)
Returns true if id has a struct return type.
@ ADDRESS_SPACE_SHARED_CLUSTER
constexpr StringLiteral GridConstant("nvvm.grid_constant")
constexpr StringLiteral MaxNTID("nvvm.maxntid")
constexpr StringLiteral MaxNReg("nvvm.maxnreg")
constexpr StringLiteral MinCTASm("nvvm.minctasm")
constexpr StringLiteral ReqNTID("nvvm.reqntid")
constexpr StringLiteral MaxClusterRank("nvvm.maxclusterrank")
constexpr StringLiteral ClusterDim("nvvm.cluster_dim")
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
FunctionAddr VTableAddr uintptr_t uintptr_t Version
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
OperandBundleDefT< Value * > OperandBundleDef
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
DenormalMode parseDenormalFPAttribute(StringRef Str)
Returns the denormal mode to use for inputs and outputs.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ Default
The result value is uniform if and only if all operands are uniform.
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Represents the full denormal controls for a function, including the default mode and the f32 specific...
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getInvalid()
constexpr bool isValid() const
static constexpr DenormalMode getIEEE()
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.