34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsAMDGPU.h"
36#include "llvm/IR/IntrinsicsARM.h"
37#include "llvm/IR/IntrinsicsNVPTX.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
39#include "llvm/IR/IntrinsicsWebAssembly.h"
40#include "llvm/IR/IntrinsicsX86.h"
63 cl::desc(
"Disable autoupgrade of debug info"));
82 Type *Arg0Type =
F->getFunctionType()->getParamType(0);
97 Type *LastArgType =
F->getFunctionType()->getParamType(
98 F->getFunctionType()->getNumParams() - 1);
113 if (
F->getReturnType()->isVectorTy())
126 Type *Arg1Type =
F->getFunctionType()->getParamType(1);
127 Type *Arg2Type =
F->getFunctionType()->getParamType(2);
144 Type *Arg1Type =
F->getFunctionType()->getParamType(1);
145 Type *Arg2Type =
F->getFunctionType()->getParamType(2);
159 if (
F->getReturnType()->getScalarType()->isBFloatTy())
169 if (
F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
183 if (Name.consume_front(
"avx."))
184 return (Name.starts_with(
"blend.p") ||
185 Name ==
"cvt.ps2.pd.256" ||
186 Name ==
"cvtdq2.pd.256" ||
187 Name ==
"cvtdq2.ps.256" ||
188 Name.starts_with(
"movnt.") ||
189 Name.starts_with(
"sqrt.p") ||
190 Name.starts_with(
"storeu.") ||
191 Name.starts_with(
"vbroadcast.s") ||
192 Name.starts_with(
"vbroadcastf128") ||
193 Name.starts_with(
"vextractf128.") ||
194 Name.starts_with(
"vinsertf128.") ||
195 Name.starts_with(
"vperm2f128.") ||
196 Name.starts_with(
"vpermil."));
198 if (Name.consume_front(
"avx2."))
199 return (Name ==
"movntdqa" ||
200 Name.starts_with(
"pabs.") ||
201 Name.starts_with(
"padds.") ||
202 Name.starts_with(
"paddus.") ||
203 Name.starts_with(
"pblendd.") ||
205 Name.starts_with(
"pbroadcast") ||
206 Name.starts_with(
"pcmpeq.") ||
207 Name.starts_with(
"pcmpgt.") ||
208 Name.starts_with(
"pmax") ||
209 Name.starts_with(
"pmin") ||
210 Name.starts_with(
"pmovsx") ||
211 Name.starts_with(
"pmovzx") ||
213 Name ==
"pmulu.dq" ||
214 Name.starts_with(
"psll.dq") ||
215 Name.starts_with(
"psrl.dq") ||
216 Name.starts_with(
"psubs.") ||
217 Name.starts_with(
"psubus.") ||
218 Name.starts_with(
"vbroadcast") ||
219 Name ==
"vbroadcasti128" ||
220 Name ==
"vextracti128" ||
221 Name ==
"vinserti128" ||
222 Name ==
"vperm2i128");
224 if (Name.consume_front(
"avx512.")) {
225 if (Name.consume_front(
"mask."))
227 return (Name.starts_with(
"add.p") ||
228 Name.starts_with(
"and.") ||
229 Name.starts_with(
"andn.") ||
230 Name.starts_with(
"broadcast.s") ||
231 Name.starts_with(
"broadcastf32x4.") ||
232 Name.starts_with(
"broadcastf32x8.") ||
233 Name.starts_with(
"broadcastf64x2.") ||
234 Name.starts_with(
"broadcastf64x4.") ||
235 Name.starts_with(
"broadcasti32x4.") ||
236 Name.starts_with(
"broadcasti32x8.") ||
237 Name.starts_with(
"broadcasti64x2.") ||
238 Name.starts_with(
"broadcasti64x4.") ||
239 Name.starts_with(
"cmp.b") ||
240 Name.starts_with(
"cmp.d") ||
241 Name.starts_with(
"cmp.q") ||
242 Name.starts_with(
"cmp.w") ||
243 Name.starts_with(
"compress.b") ||
244 Name.starts_with(
"compress.d") ||
245 Name.starts_with(
"compress.p") ||
246 Name.starts_with(
"compress.q") ||
247 Name.starts_with(
"compress.store.") ||
248 Name.starts_with(
"compress.w") ||
249 Name.starts_with(
"conflict.") ||
250 Name.starts_with(
"cvtdq2pd.") ||
251 Name.starts_with(
"cvtdq2ps.") ||
252 Name ==
"cvtpd2dq.256" ||
253 Name ==
"cvtpd2ps.256" ||
254 Name ==
"cvtps2pd.128" ||
255 Name ==
"cvtps2pd.256" ||
256 Name.starts_with(
"cvtqq2pd.") ||
257 Name ==
"cvtqq2ps.256" ||
258 Name ==
"cvtqq2ps.512" ||
259 Name ==
"cvttpd2dq.256" ||
260 Name ==
"cvttps2dq.128" ||
261 Name ==
"cvttps2dq.256" ||
262 Name.starts_with(
"cvtudq2pd.") ||
263 Name.starts_with(
"cvtudq2ps.") ||
264 Name.starts_with(
"cvtuqq2pd.") ||
265 Name ==
"cvtuqq2ps.256" ||
266 Name ==
"cvtuqq2ps.512" ||
267 Name.starts_with(
"dbpsadbw.") ||
268 Name.starts_with(
"div.p") ||
269 Name.starts_with(
"expand.b") ||
270 Name.starts_with(
"expand.d") ||
271 Name.starts_with(
"expand.load.") ||
272 Name.starts_with(
"expand.p") ||
273 Name.starts_with(
"expand.q") ||
274 Name.starts_with(
"expand.w") ||
275 Name.starts_with(
"fpclass.p") ||
276 Name.starts_with(
"insert") ||
277 Name.starts_with(
"load.") ||
278 Name.starts_with(
"loadu.") ||
279 Name.starts_with(
"lzcnt.") ||
280 Name.starts_with(
"max.p") ||
281 Name.starts_with(
"min.p") ||
282 Name.starts_with(
"movddup") ||
283 Name.starts_with(
"move.s") ||
284 Name.starts_with(
"movshdup") ||
285 Name.starts_with(
"movsldup") ||
286 Name.starts_with(
"mul.p") ||
287 Name.starts_with(
"or.") ||
288 Name.starts_with(
"pabs.") ||
289 Name.starts_with(
"packssdw.") ||
290 Name.starts_with(
"packsswb.") ||
291 Name.starts_with(
"packusdw.") ||
292 Name.starts_with(
"packuswb.") ||
293 Name.starts_with(
"padd.") ||
294 Name.starts_with(
"padds.") ||
295 Name.starts_with(
"paddus.") ||
296 Name.starts_with(
"palignr.") ||
297 Name.starts_with(
"pand.") ||
298 Name.starts_with(
"pandn.") ||
299 Name.starts_with(
"pavg") ||
300 Name.starts_with(
"pbroadcast") ||
301 Name.starts_with(
"pcmpeq.") ||
302 Name.starts_with(
"pcmpgt.") ||
303 Name.starts_with(
"perm.df.") ||
304 Name.starts_with(
"perm.di.") ||
305 Name.starts_with(
"permvar.") ||
306 Name.starts_with(
"pmaddubs.w.") ||
307 Name.starts_with(
"pmaddw.d.") ||
308 Name.starts_with(
"pmax") ||
309 Name.starts_with(
"pmin") ||
310 Name ==
"pmov.qd.256" ||
311 Name ==
"pmov.qd.512" ||
312 Name ==
"pmov.wb.256" ||
313 Name ==
"pmov.wb.512" ||
314 Name.starts_with(
"pmovsx") ||
315 Name.starts_with(
"pmovzx") ||
316 Name.starts_with(
"pmul.dq.") ||
317 Name.starts_with(
"pmul.hr.sw.") ||
318 Name.starts_with(
"pmulh.w.") ||
319 Name.starts_with(
"pmulhu.w.") ||
320 Name.starts_with(
"pmull.") ||
321 Name.starts_with(
"pmultishift.qb.") ||
322 Name.starts_with(
"pmulu.dq.") ||
323 Name.starts_with(
"por.") ||
324 Name.starts_with(
"prol.") ||
325 Name.starts_with(
"prolv.") ||
326 Name.starts_with(
"pror.") ||
327 Name.starts_with(
"prorv.") ||
328 Name.starts_with(
"pshuf.b.") ||
329 Name.starts_with(
"pshuf.d.") ||
330 Name.starts_with(
"pshufh.w.") ||
331 Name.starts_with(
"pshufl.w.") ||
332 Name.starts_with(
"psll.d") ||
333 Name.starts_with(
"psll.q") ||
334 Name.starts_with(
"psll.w") ||
335 Name.starts_with(
"pslli") ||
336 Name.starts_with(
"psllv") ||
337 Name.starts_with(
"psra.d") ||
338 Name.starts_with(
"psra.q") ||
339 Name.starts_with(
"psra.w") ||
340 Name.starts_with(
"psrai") ||
341 Name.starts_with(
"psrav") ||
342 Name.starts_with(
"psrl.d") ||
343 Name.starts_with(
"psrl.q") ||
344 Name.starts_with(
"psrl.w") ||
345 Name.starts_with(
"psrli") ||
346 Name.starts_with(
"psrlv") ||
347 Name.starts_with(
"psub.") ||
348 Name.starts_with(
"psubs.") ||
349 Name.starts_with(
"psubus.") ||
350 Name.starts_with(
"pternlog.") ||
351 Name.starts_with(
"punpckh") ||
352 Name.starts_with(
"punpckl") ||
353 Name.starts_with(
"pxor.") ||
354 Name.starts_with(
"shuf.f") ||
355 Name.starts_with(
"shuf.i") ||
356 Name.starts_with(
"shuf.p") ||
357 Name.starts_with(
"sqrt.p") ||
358 Name.starts_with(
"store.b.") ||
359 Name.starts_with(
"store.d.") ||
360 Name.starts_with(
"store.p") ||
361 Name.starts_with(
"store.q.") ||
362 Name.starts_with(
"store.w.") ||
363 Name ==
"store.ss" ||
364 Name.starts_with(
"storeu.") ||
365 Name.starts_with(
"sub.p") ||
366 Name.starts_with(
"ucmp.") ||
367 Name.starts_with(
"unpckh.") ||
368 Name.starts_with(
"unpckl.") ||
369 Name.starts_with(
"valign.") ||
370 Name ==
"vcvtph2ps.128" ||
371 Name ==
"vcvtph2ps.256" ||
372 Name.starts_with(
"vextract") ||
373 Name.starts_with(
"vfmadd.") ||
374 Name.starts_with(
"vfmaddsub.") ||
375 Name.starts_with(
"vfnmadd.") ||
376 Name.starts_with(
"vfnmsub.") ||
377 Name.starts_with(
"vpdpbusd.") ||
378 Name.starts_with(
"vpdpbusds.") ||
379 Name.starts_with(
"vpdpwssd.") ||
380 Name.starts_with(
"vpdpwssds.") ||
381 Name.starts_with(
"vpermi2var.") ||
382 Name.starts_with(
"vpermil.p") ||
383 Name.starts_with(
"vpermilvar.") ||
384 Name.starts_with(
"vpermt2var.") ||
385 Name.starts_with(
"vpmadd52") ||
386 Name.starts_with(
"vpshld.") ||
387 Name.starts_with(
"vpshldv.") ||
388 Name.starts_with(
"vpshrd.") ||
389 Name.starts_with(
"vpshrdv.") ||
390 Name.starts_with(
"vpshufbitqmb.") ||
391 Name.starts_with(
"xor."));
393 if (Name.consume_front(
"mask3."))
395 return (Name.starts_with(
"vfmadd.") ||
396 Name.starts_with(
"vfmaddsub.") ||
397 Name.starts_with(
"vfmsub.") ||
398 Name.starts_with(
"vfmsubadd.") ||
399 Name.starts_with(
"vfnmsub."));
401 if (Name.consume_front(
"maskz."))
403 return (Name.starts_with(
"pternlog.") ||
404 Name.starts_with(
"vfmadd.") ||
405 Name.starts_with(
"vfmaddsub.") ||
406 Name.starts_with(
"vpdpbusd.") ||
407 Name.starts_with(
"vpdpbusds.") ||
408 Name.starts_with(
"vpdpwssd.") ||
409 Name.starts_with(
"vpdpwssds.") ||
410 Name.starts_with(
"vpermt2var.") ||
411 Name.starts_with(
"vpmadd52") ||
412 Name.starts_with(
"vpshldv.") ||
413 Name.starts_with(
"vpshrdv."));
416 return (Name ==
"movntdqa" ||
417 Name ==
"pmul.dq.512" ||
418 Name ==
"pmulu.dq.512" ||
419 Name.starts_with(
"broadcastm") ||
420 Name.starts_with(
"cmp.p") ||
421 Name.starts_with(
"cvtb2mask.") ||
422 Name.starts_with(
"cvtd2mask.") ||
423 Name.starts_with(
"cvtmask2") ||
424 Name.starts_with(
"cvtq2mask.") ||
425 Name ==
"cvtusi2sd" ||
426 Name.starts_with(
"cvtw2mask.") ||
431 Name ==
"kortestc.w" ||
432 Name ==
"kortestz.w" ||
433 Name.starts_with(
"kunpck") ||
436 Name.starts_with(
"padds.") ||
437 Name.starts_with(
"pbroadcast") ||
438 Name.starts_with(
"prol") ||
439 Name.starts_with(
"pror") ||
440 Name.starts_with(
"psll.dq") ||
441 Name.starts_with(
"psrl.dq") ||
442 Name.starts_with(
"psubs.") ||
443 Name.starts_with(
"ptestm") ||
444 Name.starts_with(
"ptestnm") ||
445 Name.starts_with(
"storent.") ||
446 Name.starts_with(
"vbroadcast.s") ||
447 Name.starts_with(
"vpshld.") ||
448 Name.starts_with(
"vpshrd."));
451 if (Name.consume_front(
"fma."))
452 return (Name.starts_with(
"vfmadd.") ||
453 Name.starts_with(
"vfmsub.") ||
454 Name.starts_with(
"vfmsubadd.") ||
455 Name.starts_with(
"vfnmadd.") ||
456 Name.starts_with(
"vfnmsub."));
458 if (Name.consume_front(
"fma4."))
459 return Name.starts_with(
"vfmadd.s");
461 if (Name.consume_front(
"sse."))
462 return (Name ==
"add.ss" ||
463 Name ==
"cvtsi2ss" ||
464 Name ==
"cvtsi642ss" ||
467 Name.starts_with(
"sqrt.p") ||
469 Name.starts_with(
"storeu.") ||
472 if (Name.consume_front(
"sse2."))
473 return (Name ==
"add.sd" ||
474 Name ==
"cvtdq2pd" ||
475 Name ==
"cvtdq2ps" ||
476 Name ==
"cvtps2pd" ||
477 Name ==
"cvtsi2sd" ||
478 Name ==
"cvtsi642sd" ||
479 Name ==
"cvtss2sd" ||
482 Name.starts_with(
"padds.") ||
483 Name.starts_with(
"paddus.") ||
484 Name.starts_with(
"pcmpeq.") ||
485 Name.starts_with(
"pcmpgt.") ||
490 Name ==
"pmulu.dq" ||
491 Name.starts_with(
"pshuf") ||
492 Name.starts_with(
"psll.dq") ||
493 Name.starts_with(
"psrl.dq") ||
494 Name.starts_with(
"psubs.") ||
495 Name.starts_with(
"psubus.") ||
496 Name.starts_with(
"sqrt.p") ||
498 Name ==
"storel.dq" ||
499 Name.starts_with(
"storeu.") ||
502 if (Name.consume_front(
"sse41."))
503 return (Name.starts_with(
"blendp") ||
504 Name ==
"movntdqa" ||
514 Name.starts_with(
"pmovsx") ||
515 Name.starts_with(
"pmovzx") ||
518 if (Name.consume_front(
"sse42."))
519 return Name ==
"crc32.64.8";
521 if (Name.consume_front(
"sse4a."))
522 return Name.starts_with(
"movnt.");
524 if (Name.consume_front(
"ssse3."))
525 return (Name ==
"pabs.b.128" ||
526 Name ==
"pabs.d.128" ||
527 Name ==
"pabs.w.128");
529 if (Name.consume_front(
"xop."))
530 return (Name ==
"vpcmov" ||
531 Name ==
"vpcmov.256" ||
532 Name.starts_with(
"vpcom") ||
533 Name.starts_with(
"vprot"));
535 return (Name ==
"addcarry.u32" ||
536 Name ==
"addcarry.u64" ||
537 Name ==
"addcarryx.u32" ||
538 Name ==
"addcarryx.u64" ||
539 Name ==
"subborrow.u32" ||
540 Name ==
"subborrow.u64" ||
541 Name.starts_with(
"vcvtph2ps."));
547 if (!Name.consume_front(
"x86."))
555 if (Name ==
"rdtscp") {
557 if (
F->getFunctionType()->getNumParams() == 0)
562 Intrinsic::x86_rdtscp);
569 if (Name.consume_front(
"sse41.ptest")) {
571 .
Case(
"c", Intrinsic::x86_sse41_ptestc)
572 .
Case(
"z", Intrinsic::x86_sse41_ptestz)
573 .
Case(
"nzc", Intrinsic::x86_sse41_ptestnzc)
586 .
Case(
"sse41.insertps", Intrinsic::x86_sse41_insertps)
587 .
Case(
"sse41.dppd", Intrinsic::x86_sse41_dppd)
588 .
Case(
"sse41.dpps", Intrinsic::x86_sse41_dpps)
589 .
Case(
"sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
590 .
Case(
"avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
591 .
Case(
"avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
596 if (Name.consume_front(
"avx512.")) {
597 if (Name.consume_front(
"mask.cmp.")) {
600 .
Case(
"pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
601 .
Case(
"pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
602 .
Case(
"pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
603 .
Case(
"ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
604 .
Case(
"ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
605 .
Case(
"ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
609 }
else if (Name.starts_with(
"vpdpbusd.") ||
610 Name.starts_with(
"vpdpbusds.")) {
613 .
Case(
"vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
614 .
Case(
"vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
615 .
Case(
"vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
616 .
Case(
"vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
617 .
Case(
"vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
618 .
Case(
"vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
622 }
else if (Name.starts_with(
"vpdpwssd.") ||
623 Name.starts_with(
"vpdpwssds.")) {
626 .
Case(
"vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)
627 .
Case(
"vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)
628 .
Case(
"vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)
629 .
Case(
"vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)
630 .
Case(
"vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)
631 .
Case(
"vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)
639 if (Name.consume_front(
"avx2.")) {
640 if (Name.consume_front(
"vpdpb")) {
643 .
Case(
"ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
644 .
Case(
"ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
645 .
Case(
"ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
646 .
Case(
"ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
647 .
Case(
"sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
648 .
Case(
"sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
649 .
Case(
"suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
650 .
Case(
"suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
651 .
Case(
"uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
652 .
Case(
"uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
653 .
Case(
"uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
654 .
Case(
"uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
658 }
else if (Name.consume_front(
"vpdpw")) {
661 .
Case(
"sud.128", Intrinsic::x86_avx2_vpdpwsud_128)
662 .
Case(
"sud.256", Intrinsic::x86_avx2_vpdpwsud_256)
663 .
Case(
"suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)
664 .
Case(
"suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)
665 .
Case(
"usd.128", Intrinsic::x86_avx2_vpdpwusd_128)
666 .
Case(
"usd.256", Intrinsic::x86_avx2_vpdpwusd_256)
667 .
Case(
"usds.128", Intrinsic::x86_avx2_vpdpwusds_128)
668 .
Case(
"usds.256", Intrinsic::x86_avx2_vpdpwusds_256)
669 .
Case(
"uud.128", Intrinsic::x86_avx2_vpdpwuud_128)
670 .
Case(
"uud.256", Intrinsic::x86_avx2_vpdpwuud_256)
671 .
Case(
"uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)
672 .
Case(
"uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)
680 if (Name.consume_front(
"avx10.")) {
681 if (Name.consume_front(
"vpdpb")) {
684 .
Case(
"ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
685 .
Case(
"ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
686 .
Case(
"sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
687 .
Case(
"suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
688 .
Case(
"uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
689 .
Case(
"uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
693 }
else if (Name.consume_front(
"vpdpw")) {
695 .
Case(
"sud.512", Intrinsic::x86_avx10_vpdpwsud_512)
696 .
Case(
"suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)
697 .
Case(
"usd.512", Intrinsic::x86_avx10_vpdpwusd_512)
698 .
Case(
"usds.512", Intrinsic::x86_avx10_vpdpwusds_512)
699 .
Case(
"uud.512", Intrinsic::x86_avx10_vpdpwuud_512)
700 .
Case(
"uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)
708 if (Name.consume_front(
"avx512bf16.")) {
711 .
Case(
"cvtne2ps2bf16.128",
712 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
713 .
Case(
"cvtne2ps2bf16.256",
714 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
715 .
Case(
"cvtne2ps2bf16.512",
716 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
717 .
Case(
"mask.cvtneps2bf16.128",
718 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
719 .
Case(
"cvtneps2bf16.256",
720 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
721 .
Case(
"cvtneps2bf16.512",
722 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
729 .
Case(
"dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
730 .
Case(
"dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
731 .
Case(
"dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
738 if (Name.consume_front(
"xop.")) {
740 if (Name.starts_with(
"vpermil2")) {
743 auto Idx =
F->getFunctionType()->getParamType(2);
744 if (Idx->isFPOrFPVectorTy()) {
745 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
746 unsigned EltSize = Idx->getScalarSizeInBits();
747 if (EltSize == 64 && IdxSize == 128)
748 ID = Intrinsic::x86_xop_vpermil2pd;
749 else if (EltSize == 32 && IdxSize == 128)
750 ID = Intrinsic::x86_xop_vpermil2ps;
751 else if (EltSize == 64 && IdxSize == 256)
752 ID = Intrinsic::x86_xop_vpermil2pd_256;
754 ID = Intrinsic::x86_xop_vpermil2ps_256;
756 }
else if (
F->arg_size() == 2)
759 .
Case(
"vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
760 .
Case(
"vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
771 if (Name ==
"seh.recoverfp") {
773 Intrinsic::eh_recoverfp);
785 if (Name.starts_with(
"rbit")) {
788 F->getParent(), Intrinsic::bitreverse,
F->arg_begin()->getType());
792 if (Name ==
"thread.pointer") {
795 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
799 bool Neon = Name.consume_front(
"neon.");
804 if (Name.consume_front(
"bfdot.")) {
808 .
Cases({
"v2f32.v8i8",
"v4f32.v16i8"},
813 size_t OperandWidth =
F->getReturnType()->getPrimitiveSizeInBits();
814 assert((OperandWidth == 64 || OperandWidth == 128) &&
815 "Unexpected operand width");
817 std::array<Type *, 2> Tys{
828 if (Name.consume_front(
"bfm")) {
830 if (Name.consume_back(
".v4f32.v16i8")) {
876 F->arg_begin()->getType());
880 if (Name.consume_front(
"vst")) {
882 static const Regex vstRegex(
"^([1234]|[234]lane)\\.v[a-z0-9]*$");
886 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
887 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
890 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
891 Intrinsic::arm_neon_vst4lane};
893 auto fArgs =
F->getFunctionType()->params();
894 Type *Tys[] = {fArgs[0], fArgs[1]};
897 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
900 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
909 if (Name.consume_front(
"mve.")) {
911 if (Name ==
"vctp64") {
921 if (Name.starts_with(
"vrintn.v")) {
923 F->getParent(), Intrinsic::roundeven,
F->arg_begin()->getType());
928 if (Name.consume_back(
".v4i1")) {
930 if (Name.consume_back(
".predicated.v2i64.v4i32"))
932 return Name ==
"mull.int" || Name ==
"vqdmull";
934 if (Name.consume_back(
".v2i64")) {
936 bool IsGather = Name.consume_front(
"vldr.gather.");
937 if (IsGather || Name.consume_front(
"vstr.scatter.")) {
938 if (Name.consume_front(
"base.")) {
940 Name.consume_front(
"wb.");
943 return Name ==
"predicated.v2i64";
946 if (Name.consume_front(
"offset.predicated."))
947 return Name == (IsGather ?
"v2i64.p0i64" :
"p0i64.v2i64") ||
948 Name == (IsGather ?
"v2i64.p0" :
"p0.v2i64");
961 if (Name.consume_front(
"cde.vcx")) {
963 if (Name.consume_back(
".predicated.v2i64.v4i1"))
965 return Name ==
"1q" || Name ==
"1qa" || Name ==
"2q" || Name ==
"2qa" ||
966 Name ==
"3q" || Name ==
"3qa";
980 F->arg_begin()->getType());
984 if (Name.starts_with(
"addp")) {
986 if (
F->arg_size() != 2)
989 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
991 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
997 if (Name.starts_with(
"bfcvt")) {
1004 if (Name.consume_front(
"sve.")) {
1006 if (Name.consume_front(
"bf")) {
1007 if (Name ==
"mmla") {
1008 Type *Tys[] = {
F->getReturnType(),
1009 std::next(
F->arg_begin())->getType()};
1011 F->getParent(), Intrinsic::aarch64_sve_fmmla, Tys);
1014 if (Name.consume_back(
".lane")) {
1018 .
Case(
"dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
1019 .
Case(
"mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1020 .
Case(
"mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1032 if (Name ==
"fcvt.bf16f32" || Name ==
"fcvtnt.bf16f32") {
1037 if (Name.consume_front(
"addqv")) {
1039 if (!
F->getReturnType()->isFPOrFPVectorTy())
1042 auto Args =
F->getFunctionType()->params();
1043 Type *Tys[] = {
F->getReturnType(), Args[1]};
1045 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
1049 if (Name.consume_front(
"ld")) {
1051 static const Regex LdRegex(
"^[234](.nxv[a-z0-9]+|$)");
1052 if (LdRegex.
match(Name)) {
1058 "Expected 2 arguments for ld* intrinsic.");
1059 Type *PtrTy =
F->getArg(1)->getType();
1062 Intrinsic::aarch64_sve_ld2_sret,
1063 Intrinsic::aarch64_sve_ld3_sret,
1064 Intrinsic::aarch64_sve_ld4_sret,
1067 F->getParent(), LoadIDs[Name[0] -
'2'], {Ty, PtrTy});
1073 if (Name.consume_front(
"tuple.")) {
1075 if (Name.starts_with(
"get")) {
1077 Type *Tys[] = {
F->getReturnType(),
F->arg_begin()->getType()};
1079 F->getParent(), Intrinsic::vector_extract, Tys);
1083 if (Name.starts_with(
"set")) {
1085 auto Args =
F->getFunctionType()->params();
1086 Type *Tys[] = {Args[0], Args[2], Args[1]};
1088 F->getParent(), Intrinsic::vector_insert, Tys);
1092 static const Regex CreateTupleRegex(
"^create[234](.nxv[a-z0-9]+|$)");
1093 if (CreateTupleRegex.
match(Name)) {
1095 auto Args =
F->getFunctionType()->params();
1096 Type *Tys[] = {
F->getReturnType(), Args[1]};
1098 F->getParent(), Intrinsic::vector_insert, Tys);
1104 if (Name.starts_with(
"rev.nxv")) {
1107 F->getParent(), Intrinsic::vector_reverse,
F->getReturnType());
1119 if (Name.consume_front(
"cp.async.bulk.tensor.g2s.")) {
1123 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1125 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1127 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1128 .
Case(
"tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1129 .
Case(
"tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1130 .
Case(
"tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1131 .
Case(
"tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1132 .
Case(
"tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1141 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1155 size_t FlagStartIndex =
F->getFunctionType()->getNumParams() - 3;
1156 Type *ArgType =
F->getFunctionType()->getParamType(FlagStartIndex);
1166 if (Name.consume_front(
"mapa.shared.cluster"))
1167 if (
F->getReturnType()->getPointerAddressSpace() ==
1169 return Intrinsic::nvvm_mapa_shared_cluster;
1171 if (Name.consume_front(
"cp.async.bulk.")) {
1174 .
Case(
"global.to.shared.cluster",
1175 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1176 .
Case(
"shared.cta.to.cluster",
1177 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1181 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1190 if (Name.consume_front(
"fma.rn."))
1192 .
Case(
"bf16", Intrinsic::nvvm_fma_rn_bf16)
1193 .
Case(
"bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1194 .
Case(
"relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1195 .
Case(
"relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1198 if (Name.consume_front(
"fmax."))
1200 .
Case(
"bf16", Intrinsic::nvvm_fmax_bf16)
1201 .
Case(
"bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1202 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1203 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1204 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1205 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1206 .
Case(
"ftz.nan.xorsign.abs.bf16",
1207 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1208 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1209 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1210 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1211 .
Case(
"ftz.xorsign.abs.bf16x2",
1212 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1213 .
Case(
"nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1214 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1215 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1216 .
Case(
"nan.xorsign.abs.bf16x2",
1217 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1218 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1219 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1222 if (Name.consume_front(
"fmin."))
1224 .
Case(
"bf16", Intrinsic::nvvm_fmin_bf16)
1225 .
Case(
"bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1226 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1227 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1228 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1229 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1230 .
Case(
"ftz.nan.xorsign.abs.bf16",
1231 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1232 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1233 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1234 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1235 .
Case(
"ftz.xorsign.abs.bf16x2",
1236 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1237 .
Case(
"nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1238 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1239 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1240 .
Case(
"nan.xorsign.abs.bf16x2",
1241 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1242 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1243 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1246 if (Name.consume_front(
"neg."))
1248 .
Case(
"bf16", Intrinsic::nvvm_neg_bf16)
1249 .
Case(
"bf16x2", Intrinsic::nvvm_neg_bf16x2)
1256 return Name.consume_front(
"local") || Name.consume_front(
"shared") ||
1257 Name.consume_front(
"global") || Name.consume_front(
"constant") ||
1258 Name.consume_front(
"param");
1264 if (Name.starts_with(
"to.fp16")) {
1268 FuncTy->getReturnType());
1271 if (Name.starts_with(
"from.fp16")) {
1275 FuncTy->getReturnType());
1282 bool CanUpgradeDebugIntrinsicsToRecords) {
1283 assert(
F &&
"Illegal to upgrade a non-existent Function.");
1288 if (!Name.consume_front(
"llvm.") || Name.empty())
1294 bool IsArm = Name.consume_front(
"arm.");
1295 if (IsArm || Name.consume_front(
"aarch64.")) {
1301 if (Name.consume_front(
"amdgcn.")) {
1302 if (Name ==
"alignbit") {
1305 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1309 if (Name.consume_front(
"atomic.")) {
1310 if (Name.starts_with(
"inc") || Name.starts_with(
"dec") ||
1311 Name.starts_with(
"cond.sub") || Name.starts_with(
"csub")) {
1320 switch (
F->getIntrinsicID()) {
1324 case Intrinsic::amdgcn_wmma_i32_16x16x64_iu8:
1325 if (
F->arg_size() == 7) {
1330 case Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8:
1331 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
1332 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
1333 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
1334 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
1335 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
1336 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16:
1337 if (
F->arg_size() == 8) {
1344 if (Name.consume_front(
"ds.") || Name.consume_front(
"global.atomic.") ||
1345 Name.consume_front(
"flat.atomic.")) {
1346 if (Name.starts_with(
"fadd") ||
1348 (Name.starts_with(
"fmin") && !Name.starts_with(
"fmin.num")) ||
1349 (Name.starts_with(
"fmax") && !Name.starts_with(
"fmax.num"))) {
1357 if (Name.starts_with(
"ldexp.")) {
1360 F->getParent(), Intrinsic::ldexp,
1361 {F->getReturnType(), F->getArg(1)->getType()});
1370 if (
F->arg_size() == 1) {
1371 if (Name.consume_front(
"convert.")) {
1385 F->arg_begin()->getType());
1390 if (
F->arg_size() == 2 && Name ==
"coro.end") {
1393 Intrinsic::coro_end);
1400 if (Name.consume_front(
"dbg.")) {
1402 if (CanUpgradeDebugIntrinsicsToRecords) {
1403 if (Name ==
"addr" || Name ==
"value" || Name ==
"assign" ||
1404 Name ==
"declare" || Name ==
"label") {
1413 if (Name ==
"addr" || (Name ==
"value" &&
F->arg_size() == 4)) {
1416 Intrinsic::dbg_value);
1423 if (Name.consume_front(
"experimental.vector.")) {
1429 .
StartsWith(
"extract.", Intrinsic::vector_extract)
1430 .
StartsWith(
"insert.", Intrinsic::vector_insert)
1431 .
StartsWith(
"reverse.", Intrinsic::vector_reverse)
1432 .
StartsWith(
"interleave2.", Intrinsic::vector_interleave2)
1433 .
StartsWith(
"deinterleave2.", Intrinsic::vector_deinterleave2)
1435 Intrinsic::vector_partial_reduce_add)
1438 const auto *FT =
F->getFunctionType();
1440 if (
ID == Intrinsic::vector_extract ||
1441 ID == Intrinsic::vector_interleave2)
1444 if (
ID != Intrinsic::vector_interleave2)
1446 if (
ID == Intrinsic::vector_insert ||
1447 ID == Intrinsic::vector_partial_reduce_add)
1455 if (Name.consume_front(
"reduce.")) {
1457 static const Regex R(
"^([a-z]+)\\.[a-z][0-9]+");
1458 if (R.match(Name, &
Groups))
1460 .
Case(
"add", Intrinsic::vector_reduce_add)
1461 .
Case(
"mul", Intrinsic::vector_reduce_mul)
1462 .
Case(
"and", Intrinsic::vector_reduce_and)
1463 .
Case(
"or", Intrinsic::vector_reduce_or)
1464 .
Case(
"xor", Intrinsic::vector_reduce_xor)
1465 .
Case(
"smax", Intrinsic::vector_reduce_smax)
1466 .
Case(
"smin", Intrinsic::vector_reduce_smin)
1467 .
Case(
"umax", Intrinsic::vector_reduce_umax)
1468 .
Case(
"umin", Intrinsic::vector_reduce_umin)
1469 .
Case(
"fmax", Intrinsic::vector_reduce_fmax)
1470 .
Case(
"fmin", Intrinsic::vector_reduce_fmin)
1475 static const Regex R2(
"^v2\\.([a-z]+)\\.[fi][0-9]+");
1480 .
Case(
"fadd", Intrinsic::vector_reduce_fadd)
1481 .
Case(
"fmul", Intrinsic::vector_reduce_fmul)
1486 auto Args =
F->getFunctionType()->params();
1488 {Args[V2 ? 1 : 0]});
1494 if (Name.consume_front(
"splice"))
1498 if (Name.consume_front(
"experimental.stepvector.")) {
1502 F->getParent(),
ID,
F->getFunctionType()->getReturnType());
1507 if (Name.starts_with(
"flt.rounds")) {
1510 Intrinsic::get_rounding);
1515 if (Name.starts_with(
"invariant.group.barrier")) {
1517 auto Args =
F->getFunctionType()->params();
1518 Type* ObjectPtr[1] = {Args[0]};
1521 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1526 if ((Name.starts_with(
"lifetime.start") ||
1527 Name.starts_with(
"lifetime.end")) &&
1528 F->arg_size() == 2) {
1530 ? Intrinsic::lifetime_start
1531 : Intrinsic::lifetime_end;
1534 F->getArg(0)->getType());
1543 .StartsWith(
"memcpy.", Intrinsic::memcpy)
1544 .StartsWith(
"memmove.", Intrinsic::memmove)
1546 if (
F->arg_size() == 5) {
1550 F->getFunctionType()->params().slice(0, 3);
1556 if (Name.starts_with(
"memset.") &&
F->arg_size() == 5) {
1559 const auto *FT =
F->getFunctionType();
1560 Type *ParamTypes[2] = {
1561 FT->getParamType(0),
1565 Intrinsic::memset, ParamTypes);
1571 .
StartsWith(
"masked.load", Intrinsic::masked_load)
1572 .
StartsWith(
"masked.gather", Intrinsic::masked_gather)
1573 .
StartsWith(
"masked.store", Intrinsic::masked_store)
1574 .
StartsWith(
"masked.scatter", Intrinsic::masked_scatter)
1576 if (MaskedID &&
F->arg_size() == 4) {
1578 if (MaskedID == Intrinsic::masked_load ||
1579 MaskedID == Intrinsic::masked_gather) {
1581 F->getParent(), MaskedID,
1582 {F->getReturnType(), F->getArg(0)->getType()});
1586 F->getParent(), MaskedID,
1587 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1593 if (Name.consume_front(
"nvvm.")) {
1595 if (
F->arg_size() == 1) {
1598 .
Cases({
"brev32",
"brev64"}, Intrinsic::bitreverse)
1599 .Case(
"clz.i", Intrinsic::ctlz)
1600 .
Case(
"popc.i", Intrinsic::ctpop)
1604 {F->getReturnType()});
1607 }
else if (
F->arg_size() == 2) {
1610 .
Cases({
"max.s",
"max.i",
"max.ll"}, Intrinsic::smax)
1611 .Cases({
"min.s",
"min.i",
"min.ll"}, Intrinsic::smin)
1612 .Cases({
"max.us",
"max.ui",
"max.ull"}, Intrinsic::umax)
1613 .Cases({
"min.us",
"min.ui",
"min.ull"}, Intrinsic::umin)
1617 {F->getReturnType()});
1623 if (!
F->getReturnType()->getScalarType()->isBFloatTy()) {
1651 bool Expand =
false;
1652 if (Name.consume_front(
"abs."))
1655 Name ==
"i" || Name ==
"ll" || Name ==
"bf16" || Name ==
"bf16x2";
1656 else if (Name.consume_front(
"fabs."))
1658 Expand = Name ==
"f" || Name ==
"ftz.f" || Name ==
"d";
1659 else if (Name.consume_front(
"ex2.approx."))
1662 Name ==
"f" || Name ==
"ftz.f" || Name ==
"d" || Name ==
"f16x2";
1663 else if (Name.consume_front(
"atomic.load."))
1672 else if (Name.consume_front(
"bitcast."))
1675 Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" || Name ==
"d2ll";
1676 else if (Name.consume_front(
"rotate."))
1678 Expand = Name ==
"b32" || Name ==
"b64" || Name ==
"right.b64";
1679 else if (Name.consume_front(
"ptr.gen.to."))
1682 else if (Name.consume_front(
"ptr."))
1685 else if (Name.consume_front(
"ldg.global."))
1687 Expand = (Name.starts_with(
"i.") || Name.starts_with(
"f.") ||
1688 Name.starts_with(
"p."));
1691 .
Case(
"barrier0",
true)
1692 .
Case(
"barrier.n",
true)
1693 .
Case(
"barrier.sync.cnt",
true)
1694 .
Case(
"barrier.sync",
true)
1695 .
Case(
"barrier",
true)
1696 .
Case(
"bar.sync",
true)
1697 .
Case(
"barrier0.popc",
true)
1698 .
Case(
"barrier0.and",
true)
1699 .
Case(
"barrier0.or",
true)
1700 .
Case(
"clz.ll",
true)
1701 .
Case(
"popc.ll",
true)
1703 .
Case(
"swap.lo.hi.b64",
true)
1704 .
Case(
"tanh.approx.f32",
true)
1716 if (Name.starts_with(
"objectsize.")) {
1717 Type *Tys[2] = {
F->getReturnType(),
F->arg_begin()->getType() };
1718 if (
F->arg_size() == 2 ||
F->arg_size() == 3) {
1721 Intrinsic::objectsize, Tys);
1728 if (Name.starts_with(
"ptr.annotation.") &&
F->arg_size() == 4) {
1731 F->getParent(), Intrinsic::ptr_annotation,
1732 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1738 if (Name.consume_front(
"riscv.")) {
1741 .
Case(
"aes32dsi", Intrinsic::riscv_aes32dsi)
1742 .
Case(
"aes32dsmi", Intrinsic::riscv_aes32dsmi)
1743 .
Case(
"aes32esi", Intrinsic::riscv_aes32esi)
1744 .
Case(
"aes32esmi", Intrinsic::riscv_aes32esmi)
1747 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1760 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1761 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1770 .
StartsWith(
"sha256sig0", Intrinsic::riscv_sha256sig0)
1771 .
StartsWith(
"sha256sig1", Intrinsic::riscv_sha256sig1)
1772 .
StartsWith(
"sha256sum0", Intrinsic::riscv_sha256sum0)
1773 .
StartsWith(
"sha256sum1", Intrinsic::riscv_sha256sum1)
1778 if (
F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1787 if (Name ==
"clmul.i32" || Name ==
"clmul.i64") {
1789 F->getParent(), Intrinsic::clmul, {F->getReturnType()});
1798 if (Name ==
"stackprotectorcheck") {
1805 if (Name ==
"thread.pointer") {
1807 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
1813 if (Name ==
"var.annotation" &&
F->arg_size() == 4) {
1816 F->getParent(), Intrinsic::var_annotation,
1817 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1820 if (Name.consume_front(
"vector.splice")) {
1821 if (Name.starts_with(
".left") || Name.starts_with(
".right"))
1829 if (Name.consume_front(
"wasm.")) {
1832 .
StartsWith(
"fma.", Intrinsic::wasm_relaxed_madd)
1833 .
StartsWith(
"fms.", Intrinsic::wasm_relaxed_nmadd)
1834 .
StartsWith(
"laneselect.", Intrinsic::wasm_relaxed_laneselect)
1839 F->getReturnType());
1843 if (Name.consume_front(
"dot.i8x16.i7x16.")) {
1845 .
Case(
"signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1847 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1866 if (ST && (!
ST->isLiteral() ||
ST->isPacked()) &&
1876 std::string
Name =
F->getName().str();
1879 Name,
F->getParent());
1890 if (Result != std::nullopt) {
1903 bool CanUpgradeDebugIntrinsicsToRecords) {
1923 GV->
getName() ==
"llvm.global_dtors")) ||
1938 unsigned N =
Init->getNumOperands();
1939 std::vector<Constant *> NewCtors(
N);
1940 for (
unsigned i = 0; i !=
N; ++i) {
1943 Ctor->getAggregateElement(1),
1957 unsigned NumElts = ResultTy->getNumElements() * 8;
1961 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
1971 for (
unsigned l = 0; l != NumElts; l += 16)
1972 for (
unsigned i = 0; i != 16; ++i) {
1973 unsigned Idx = NumElts + i - Shift;
1975 Idx -= NumElts - 16;
1976 Idxs[l + i] = Idx + l;
1979 Res = Builder.CreateShuffleVector(Res,
Op,
ArrayRef(Idxs, NumElts));
1983 return Builder.CreateBitCast(Res, ResultTy,
"cast");
1991 unsigned NumElts = ResultTy->getNumElements() * 8;
1995 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
2005 for (
unsigned l = 0; l != NumElts; l += 16)
2006 for (
unsigned i = 0; i != 16; ++i) {
2007 unsigned Idx = i + Shift;
2009 Idx += NumElts - 16;
2010 Idxs[l + i] = Idx + l;
2013 Res = Builder.CreateShuffleVector(
Op, Res,
ArrayRef(Idxs, NumElts));
2017 return Builder.CreateBitCast(Res, ResultTy,
"cast");
2025 Mask = Builder.CreateBitCast(Mask, MaskTy);
2031 for (
unsigned i = 0; i != NumElts; ++i)
2033 Mask = Builder.CreateShuffleVector(Mask, Mask,
ArrayRef(Indices, NumElts),
2044 if (
C->isAllOnesValue())
2049 return Builder.CreateSelect(Mask, Op0, Op1);
2056 if (
C->isAllOnesValue())
2060 Mask->getType()->getIntegerBitWidth());
2061 Mask = Builder.CreateBitCast(Mask, MaskTy);
2062 Mask = Builder.CreateExtractElement(Mask, (
uint64_t)0);
2063 return Builder.CreateSelect(Mask, Op0, Op1);
2076 assert((IsVALIGN || NumElts % 16 == 0) &&
"Illegal NumElts for PALIGNR!");
2077 assert((!IsVALIGN || NumElts <= 16) &&
"NumElts too large for VALIGN!");
2082 ShiftVal &= (NumElts - 1);
2091 if (ShiftVal > 16) {
2099 for (
unsigned l = 0; l < NumElts; l += 16) {
2100 for (
unsigned i = 0; i != 16; ++i) {
2101 unsigned Idx = ShiftVal + i;
2102 if (!IsVALIGN && Idx >= 16)
2103 Idx += NumElts - 16;
2104 Indices[l + i] = Idx + l;
2109 Op1, Op0,
ArrayRef(Indices, NumElts),
"palignr");
2115 bool ZeroMask,
bool IndexForm) {
2118 unsigned EltWidth = Ty->getScalarSizeInBits();
2119 bool IsFloat = Ty->isFPOrFPVectorTy();
2121 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2122 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2123 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2124 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2125 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2126 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2127 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2128 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2129 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2130 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2131 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2132 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2133 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2134 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2135 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2136 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2137 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2138 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2139 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2140 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2141 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2142 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2143 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2144 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2145 else if (VecWidth == 128 && EltWidth == 16)
2146 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2147 else if (VecWidth == 256 && EltWidth == 16)
2148 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2149 else if (VecWidth == 512 && EltWidth == 16)
2150 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2151 else if (VecWidth == 128 && EltWidth == 8)
2152 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2153 else if (VecWidth == 256 && EltWidth == 8)
2154 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2155 else if (VecWidth == 512 && EltWidth == 8)
2156 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2167 Value *V = Builder.CreateIntrinsic(IID, Args);
2179 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2190 bool IsRotateRight) {
2200 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
2201 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2204 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2205 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2250 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2255 bool IsShiftRight,
bool ZeroMask) {
2269 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
2270 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2273 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2274 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2289 const Align Alignment =
2291 ?
Align(
Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2296 if (
C->isAllOnesValue())
2297 return Builder.CreateAlignedStore(
Data, Ptr, Alignment);
2302 return Builder.CreateMaskedStore(
Data, Ptr, Alignment, Mask);
2308 const Align Alignment =
2317 if (
C->isAllOnesValue())
2318 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2323 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2329 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2330 {Op0, Builder.getInt1(
false)});
2345 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2346 LHS = Builder.CreateShl(
LHS, ShiftAmt);
2347 LHS = Builder.CreateAShr(
LHS, ShiftAmt);
2348 RHS = Builder.CreateShl(
RHS, ShiftAmt);
2349 RHS = Builder.CreateAShr(
RHS, ShiftAmt);
2352 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2353 LHS = Builder.CreateAnd(
LHS, Mask);
2354 RHS = Builder.CreateAnd(
RHS, Mask);
2371 if (!
C || !
C->isAllOnesValue())
2372 Vec = Builder.CreateAnd(Vec,
getX86MaskVec(Builder, Mask, NumElts));
2377 for (
unsigned i = 0; i != NumElts; ++i)
2379 for (
unsigned i = NumElts; i != 8; ++i)
2380 Indices[i] = NumElts + i % NumElts;
2381 Vec = Builder.CreateShuffleVector(Vec,
2385 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2389 unsigned CC,
bool Signed) {
2397 }
else if (CC == 7) {
2433 Value* AndNode = Builder.CreateAnd(Mask,
APInt(8, 1));
2434 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2436 Value* Extract2 = Builder.CreateExtractElement(Src, (
uint64_t)0);
2437 Value*
Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2446 return Builder.CreateSExt(Mask, ReturnOp,
"vpmovm2");
2452 Name = Name.substr(12);
2457 if (Name.starts_with(
"max.p")) {
2458 if (VecWidth == 128 && EltWidth == 32)
2459 IID = Intrinsic::x86_sse_max_ps;
2460 else if (VecWidth == 128 && EltWidth == 64)
2461 IID = Intrinsic::x86_sse2_max_pd;
2462 else if (VecWidth == 256 && EltWidth == 32)
2463 IID = Intrinsic::x86_avx_max_ps_256;
2464 else if (VecWidth == 256 && EltWidth == 64)
2465 IID = Intrinsic::x86_avx_max_pd_256;
2468 }
else if (Name.starts_with(
"min.p")) {
2469 if (VecWidth == 128 && EltWidth == 32)
2470 IID = Intrinsic::x86_sse_min_ps;
2471 else if (VecWidth == 128 && EltWidth == 64)
2472 IID = Intrinsic::x86_sse2_min_pd;
2473 else if (VecWidth == 256 && EltWidth == 32)
2474 IID = Intrinsic::x86_avx_min_ps_256;
2475 else if (VecWidth == 256 && EltWidth == 64)
2476 IID = Intrinsic::x86_avx_min_pd_256;
2479 }
else if (Name.starts_with(
"pshuf.b.")) {
2480 if (VecWidth == 128)
2481 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2482 else if (VecWidth == 256)
2483 IID = Intrinsic::x86_avx2_pshuf_b;
2484 else if (VecWidth == 512)
2485 IID = Intrinsic::x86_avx512_pshuf_b_512;
2488 }
else if (Name.starts_with(
"pmul.hr.sw.")) {
2489 if (VecWidth == 128)
2490 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2491 else if (VecWidth == 256)
2492 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2493 else if (VecWidth == 512)
2494 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2497 }
else if (Name.starts_with(
"pmulh.w.")) {
2498 if (VecWidth == 128)
2499 IID = Intrinsic::x86_sse2_pmulh_w;
2500 else if (VecWidth == 256)
2501 IID = Intrinsic::x86_avx2_pmulh_w;
2502 else if (VecWidth == 512)
2503 IID = Intrinsic::x86_avx512_pmulh_w_512;
2506 }
else if (Name.starts_with(
"pmulhu.w.")) {
2507 if (VecWidth == 128)
2508 IID = Intrinsic::x86_sse2_pmulhu_w;
2509 else if (VecWidth == 256)
2510 IID = Intrinsic::x86_avx2_pmulhu_w;
2511 else if (VecWidth == 512)
2512 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2515 }
else if (Name.starts_with(
"pmaddw.d.")) {
2516 if (VecWidth == 128)
2517 IID = Intrinsic::x86_sse2_pmadd_wd;
2518 else if (VecWidth == 256)
2519 IID = Intrinsic::x86_avx2_pmadd_wd;
2520 else if (VecWidth == 512)
2521 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2524 }
else if (Name.starts_with(
"pmaddubs.w.")) {
2525 if (VecWidth == 128)
2526 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2527 else if (VecWidth == 256)
2528 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2529 else if (VecWidth == 512)
2530 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2533 }
else if (Name.starts_with(
"packsswb.")) {
2534 if (VecWidth == 128)
2535 IID = Intrinsic::x86_sse2_packsswb_128;
2536 else if (VecWidth == 256)
2537 IID = Intrinsic::x86_avx2_packsswb;
2538 else if (VecWidth == 512)
2539 IID = Intrinsic::x86_avx512_packsswb_512;
2542 }
else if (Name.starts_with(
"packssdw.")) {
2543 if (VecWidth == 128)
2544 IID = Intrinsic::x86_sse2_packssdw_128;
2545 else if (VecWidth == 256)
2546 IID = Intrinsic::x86_avx2_packssdw;
2547 else if (VecWidth == 512)
2548 IID = Intrinsic::x86_avx512_packssdw_512;
2551 }
else if (Name.starts_with(
"packuswb.")) {
2552 if (VecWidth == 128)
2553 IID = Intrinsic::x86_sse2_packuswb_128;
2554 else if (VecWidth == 256)
2555 IID = Intrinsic::x86_avx2_packuswb;
2556 else if (VecWidth == 512)
2557 IID = Intrinsic::x86_avx512_packuswb_512;
2560 }
else if (Name.starts_with(
"packusdw.")) {
2561 if (VecWidth == 128)
2562 IID = Intrinsic::x86_sse41_packusdw;
2563 else if (VecWidth == 256)
2564 IID = Intrinsic::x86_avx2_packusdw;
2565 else if (VecWidth == 512)
2566 IID = Intrinsic::x86_avx512_packusdw_512;
2569 }
else if (Name.starts_with(
"vpermilvar.")) {
2570 if (VecWidth == 128 && EltWidth == 32)
2571 IID = Intrinsic::x86_avx_vpermilvar_ps;
2572 else if (VecWidth == 128 && EltWidth == 64)
2573 IID = Intrinsic::x86_avx_vpermilvar_pd;
2574 else if (VecWidth == 256 && EltWidth == 32)
2575 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2576 else if (VecWidth == 256 && EltWidth == 64)
2577 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2578 else if (VecWidth == 512 && EltWidth == 32)
2579 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2580 else if (VecWidth == 512 && EltWidth == 64)
2581 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2584 }
else if (Name ==
"cvtpd2dq.256") {
2585 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2586 }
else if (Name ==
"cvtpd2ps.256") {
2587 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2588 }
else if (Name ==
"cvttpd2dq.256") {
2589 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2590 }
else if (Name ==
"cvttps2dq.128") {
2591 IID = Intrinsic::x86_sse2_cvttps2dq;
2592 }
else if (Name ==
"cvttps2dq.256") {
2593 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2594 }
else if (Name.starts_with(
"permvar.")) {
2596 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2597 IID = Intrinsic::x86_avx2_permps;
2598 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2599 IID = Intrinsic::x86_avx2_permd;
2600 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2601 IID = Intrinsic::x86_avx512_permvar_df_256;
2602 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2603 IID = Intrinsic::x86_avx512_permvar_di_256;
2604 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2605 IID = Intrinsic::x86_avx512_permvar_sf_512;
2606 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2607 IID = Intrinsic::x86_avx512_permvar_si_512;
2608 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2609 IID = Intrinsic::x86_avx512_permvar_df_512;
2610 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2611 IID = Intrinsic::x86_avx512_permvar_di_512;
2612 else if (VecWidth == 128 && EltWidth == 16)
2613 IID = Intrinsic::x86_avx512_permvar_hi_128;
2614 else if (VecWidth == 256 && EltWidth == 16)
2615 IID = Intrinsic::x86_avx512_permvar_hi_256;
2616 else if (VecWidth == 512 && EltWidth == 16)
2617 IID = Intrinsic::x86_avx512_permvar_hi_512;
2618 else if (VecWidth == 128 && EltWidth == 8)
2619 IID = Intrinsic::x86_avx512_permvar_qi_128;
2620 else if (VecWidth == 256 && EltWidth == 8)
2621 IID = Intrinsic::x86_avx512_permvar_qi_256;
2622 else if (VecWidth == 512 && EltWidth == 8)
2623 IID = Intrinsic::x86_avx512_permvar_qi_512;
2626 }
else if (Name.starts_with(
"dbpsadbw.")) {
2627 if (VecWidth == 128)
2628 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2629 else if (VecWidth == 256)
2630 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2631 else if (VecWidth == 512)
2632 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2635 }
else if (Name.starts_with(
"pmultishift.qb.")) {
2636 if (VecWidth == 128)
2637 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2638 else if (VecWidth == 256)
2639 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2640 else if (VecWidth == 512)
2641 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2644 }
else if (Name.starts_with(
"conflict.")) {
2645 if (Name[9] ==
'd' && VecWidth == 128)
2646 IID = Intrinsic::x86_avx512_conflict_d_128;
2647 else if (Name[9] ==
'd' && VecWidth == 256)
2648 IID = Intrinsic::x86_avx512_conflict_d_256;
2649 else if (Name[9] ==
'd' && VecWidth == 512)
2650 IID = Intrinsic::x86_avx512_conflict_d_512;
2651 else if (Name[9] ==
'q' && VecWidth == 128)
2652 IID = Intrinsic::x86_avx512_conflict_q_128;
2653 else if (Name[9] ==
'q' && VecWidth == 256)
2654 IID = Intrinsic::x86_avx512_conflict_q_256;
2655 else if (Name[9] ==
'q' && VecWidth == 512)
2656 IID = Intrinsic::x86_avx512_conflict_q_512;
2659 }
else if (Name.starts_with(
"pavg.")) {
2660 if (Name[5] ==
'b' && VecWidth == 128)
2661 IID = Intrinsic::x86_sse2_pavg_b;
2662 else if (Name[5] ==
'b' && VecWidth == 256)
2663 IID = Intrinsic::x86_avx2_pavg_b;
2664 else if (Name[5] ==
'b' && VecWidth == 512)
2665 IID = Intrinsic::x86_avx512_pavg_b_512;
2666 else if (Name[5] ==
'w' && VecWidth == 128)
2667 IID = Intrinsic::x86_sse2_pavg_w;
2668 else if (Name[5] ==
'w' && VecWidth == 256)
2669 IID = Intrinsic::x86_avx2_pavg_w;
2670 else if (Name[5] ==
'w' && VecWidth == 512)
2671 IID = Intrinsic::x86_avx512_pavg_w_512;
2680 Rep = Builder.CreateIntrinsic(IID, Args);
2691 if (AsmStr->find(
"mov\tfp") == 0 &&
2692 AsmStr->find(
"objc_retainAutoreleaseReturnValue") != std::string::npos &&
2693 (Pos = AsmStr->find(
"# marker")) != std::string::npos) {
2694 AsmStr->replace(Pos, 1,
";");
2700 Value *Rep =
nullptr;
2702 if (Name ==
"abs.i" || Name ==
"abs.ll") {
2704 Rep = Builder.CreateIntrinsic(Intrinsic::abs, {Arg->
getType()},
2705 {Arg, Builder.getTrue()},
2707 }
else if (Name ==
"abs.bf16" || Name ==
"abs.bf16x2") {
2708 Type *Ty = (Name ==
"abs.bf16")
2712 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2713 Rep = Builder.CreateBitCast(Abs, CI->
getType());
2714 }
else if (Name ==
"fabs.f" || Name ==
"fabs.ftz.f" || Name ==
"fabs.d") {
2715 Intrinsic::ID IID = (Name ==
"fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2716 : Intrinsic::nvvm_fabs;
2717 Rep = Builder.CreateUnaryIntrinsic(IID, CI->
getArgOperand(0));
2718 }
else if (Name.consume_front(
"ex2.approx.")) {
2720 Intrinsic::ID IID = Name.starts_with(
"ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2721 : Intrinsic::nvvm_ex2_approx;
2722 Rep = Builder.CreateUnaryIntrinsic(IID, CI->
getArgOperand(0));
2723 }
else if (Name.starts_with(
"atomic.load.add.f32.p") ||
2724 Name.starts_with(
"atomic.load.add.f64.p")) {
2727 Rep = Builder.CreateAtomicRMW(
2733 }
else if (Name.starts_with(
"atomic.load.inc.32.p") ||
2734 Name.starts_with(
"atomic.load.dec.32.p")) {
2739 Rep = Builder.CreateAtomicRMW(
2743 }
else if (Name ==
"clz.ll") {
2746 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->
getType()},
2747 {Arg, Builder.getFalse()},
2749 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(),
"ctlz.trunc");
2750 }
else if (Name ==
"popc.ll") {
2754 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->
getType()},
2755 Arg,
nullptr,
"ctpop");
2756 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(),
"ctpop.trunc");
2757 }
else if (Name ==
"h2f") {
2759 Builder.CreateBitCast(CI->
getArgOperand(0), Builder.getHalfTy());
2760 Rep = Builder.CreateFPExt(Cast, Builder.getFloatTy());
2761 }
else if (Name.consume_front(
"bitcast.") &&
2762 (Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" ||
2765 }
else if (Name ==
"rotate.b32") {
2768 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2769 {Arg, Arg, ShiftAmt});
2770 }
else if (Name ==
"rotate.b64") {
2774 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2775 {Arg, Arg, ZExtShiftAmt});
2776 }
else if (Name ==
"rotate.right.b64") {
2780 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2781 {Arg, Arg, ZExtShiftAmt});
2782 }
else if (Name ==
"swap.lo.hi.b64") {
2785 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2786 {Arg, Arg, Builder.getInt64(32)});
2787 }
else if ((Name.consume_front(
"ptr.gen.to.") &&
2790 Name.starts_with(
".to.gen"))) {
2792 }
else if (Name.consume_front(
"ldg.global")) {
2796 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2799 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2801 }
else if (Name ==
"tanh.approx.f32") {
2805 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->
getArgOperand(0),
2807 }
else if (Name ==
"barrier0" || Name ==
"barrier.n" || Name ==
"bar.sync") {
2809 Name.ends_with(
'0') ? Builder.getInt32(0) : CI->
getArgOperand(0);
2810 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2812 }
else if (Name ==
"barrier") {
2813 Rep = Builder.CreateIntrinsic(
2814 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2816 }
else if (Name ==
"barrier.sync") {
2817 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2819 }
else if (Name ==
"barrier.sync.cnt") {
2820 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2822 }
else if (Name ==
"barrier0.popc" || Name ==
"barrier0.and" ||
2823 Name ==
"barrier0.or") {
2825 C = Builder.CreateICmpNE(
C, Builder.getInt32(0));
2829 .
Case(
"barrier0.popc",
2830 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2831 .
Case(
"barrier0.and",
2832 Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2833 .
Case(
"barrier0.or",
2834 Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2835 Value *Bar = Builder.CreateIntrinsic(IID, {}, {Builder.getInt32(0),
C});
2836 Rep = Builder.CreateZExt(Bar, CI->
getType());
2840 !
F->getReturnType()->getScalarType()->isBFloatTy()) {
2850 ? Builder.CreateBitCast(Arg, NewType)
2853 Rep = Builder.CreateCall(NewFn, Args);
2854 if (
F->getReturnType()->isIntegerTy())
2855 Rep = Builder.CreateBitCast(Rep,
F->getReturnType());
2865 Value *Rep =
nullptr;
2867 if (Name.starts_with(
"sse4a.movnt.")) {
2879 Builder.CreateExtractElement(Arg1, (
uint64_t)0,
"extractelement");
2882 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2883 }
else if (Name.starts_with(
"avx.movnt.") ||
2884 Name.starts_with(
"avx512.storent.")) {
2896 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2897 }
else if (Name ==
"sse2.storel.dq") {
2902 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
2903 Value *Elt = Builder.CreateExtractElement(BC0, (
uint64_t)0);
2904 Builder.CreateAlignedStore(Elt, Arg0,
Align(1));
2905 }
else if (Name.starts_with(
"sse.storeu.") ||
2906 Name.starts_with(
"sse2.storeu.") ||
2907 Name.starts_with(
"avx.storeu.")) {
2910 Builder.CreateAlignedStore(Arg1, Arg0,
Align(1));
2911 }
else if (Name ==
"avx512.mask.store.ss") {
2915 }
else if (Name.starts_with(
"avx512.mask.store")) {
2917 bool Aligned = Name[17] !=
'u';
2920 }
else if (Name.starts_with(
"sse2.pcmp") || Name.starts_with(
"avx2.pcmp")) {
2923 bool CmpEq = Name[9] ==
'e';
2926 Rep = Builder.CreateSExt(Rep, CI->
getType(),
"");
2927 }
else if (Name.starts_with(
"avx512.broadcastm")) {
2934 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2935 }
else if (Name ==
"sse.sqrt.ss" || Name ==
"sse2.sqrt.sd") {
2937 Value *Elt0 = Builder.CreateExtractElement(Vec, (
uint64_t)0);
2938 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->
getType(), Elt0);
2939 Rep = Builder.CreateInsertElement(Vec, Elt0, (
uint64_t)0);
2940 }
else if (Name.starts_with(
"avx.sqrt.p") ||
2941 Name.starts_with(
"sse2.sqrt.p") ||
2942 Name.starts_with(
"sse.sqrt.p")) {
2943 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
2944 {CI->getArgOperand(0)});
2945 }
else if (Name.starts_with(
"avx512.mask.sqrt.p")) {
2949 Intrinsic::ID IID = Name[18] ==
's' ? Intrinsic::x86_avx512_sqrt_ps_512
2950 : Intrinsic::x86_avx512_sqrt_pd_512;
2953 Rep = Builder.CreateIntrinsic(IID, Args);
2955 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
2956 {CI->getArgOperand(0)});
2960 }
else if (Name.starts_with(
"avx512.ptestm") ||
2961 Name.starts_with(
"avx512.ptestnm")) {
2965 Rep = Builder.CreateAnd(Op0, Op1);
2971 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2973 }
else if (Name.starts_with(
"avx512.mask.pbroadcast")) {
2976 Rep = Builder.CreateVectorSplat(NumElts, CI->
getArgOperand(0));
2979 }
else if (Name.starts_with(
"avx512.kunpck")) {
2984 for (
unsigned i = 0; i != NumElts; ++i)
2993 Rep = Builder.CreateShuffleVector(
RHS,
LHS,
ArrayRef(Indices, NumElts));
2994 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2995 }
else if (Name ==
"avx512.kand.w") {
2998 Rep = Builder.CreateAnd(
LHS,
RHS);
2999 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3000 }
else if (Name ==
"avx512.kandn.w") {
3003 LHS = Builder.CreateNot(
LHS);
3004 Rep = Builder.CreateAnd(
LHS,
RHS);
3005 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3006 }
else if (Name ==
"avx512.kor.w") {
3009 Rep = Builder.CreateOr(
LHS,
RHS);
3010 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3011 }
else if (Name ==
"avx512.kxor.w") {
3014 Rep = Builder.CreateXor(
LHS,
RHS);
3015 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3016 }
else if (Name ==
"avx512.kxnor.w") {
3019 LHS = Builder.CreateNot(
LHS);
3020 Rep = Builder.CreateXor(
LHS,
RHS);
3021 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3022 }
else if (Name ==
"avx512.knot.w") {
3024 Rep = Builder.CreateNot(Rep);
3025 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3026 }
else if (Name ==
"avx512.kortestz.w" || Name ==
"avx512.kortestc.w") {
3029 Rep = Builder.CreateOr(
LHS,
RHS);
3030 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
3032 if (Name[14] ==
'c')
3036 Rep = Builder.CreateICmpEQ(Rep,
C);
3037 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
3038 }
else if (Name ==
"sse.add.ss" || Name ==
"sse2.add.sd" ||
3039 Name ==
"sse.sub.ss" || Name ==
"sse2.sub.sd" ||
3040 Name ==
"sse.mul.ss" || Name ==
"sse2.mul.sd" ||
3041 Name ==
"sse.div.ss" || Name ==
"sse2.div.sd") {
3044 ConstantInt::get(I32Ty, 0));
3046 ConstantInt::get(I32Ty, 0));
3048 if (Name.contains(
".add."))
3049 EltOp = Builder.CreateFAdd(Elt0, Elt1);
3050 else if (Name.contains(
".sub."))
3051 EltOp = Builder.CreateFSub(Elt0, Elt1);
3052 else if (Name.contains(
".mul."))
3053 EltOp = Builder.CreateFMul(Elt0, Elt1);
3055 EltOp = Builder.CreateFDiv(Elt0, Elt1);
3056 Rep = Builder.CreateInsertElement(CI->
getArgOperand(0), EltOp,
3057 ConstantInt::get(I32Ty, 0));
3058 }
else if (Name.starts_with(
"avx512.mask.pcmp")) {
3060 bool CmpEq = Name[16] ==
'e';
3062 }
else if (Name.starts_with(
"avx512.mask.vpshufbitqmb.")) {
3071 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3074 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3077 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3084 }
else if (Name.starts_with(
"avx512.mask.fpclass.p")) {
3089 if (VecWidth == 128 && EltWidth == 32)
3090 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3091 else if (VecWidth == 256 && EltWidth == 32)
3092 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3093 else if (VecWidth == 512 && EltWidth == 32)
3094 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3095 else if (VecWidth == 128 && EltWidth == 64)
3096 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3097 else if (VecWidth == 256 && EltWidth == 64)
3098 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3099 else if (VecWidth == 512 && EltWidth == 64)
3100 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3107 }
else if (Name.starts_with(
"avx512.cmp.p")) {
3109 Type *OpTy = Args[0]->getType();
3113 if (VecWidth == 128 && EltWidth == 32)
3114 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3115 else if (VecWidth == 256 && EltWidth == 32)
3116 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3117 else if (VecWidth == 512 && EltWidth == 32)
3118 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3119 else if (VecWidth == 128 && EltWidth == 64)
3120 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3121 else if (VecWidth == 256 && EltWidth == 64)
3122 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3123 else if (VecWidth == 512 && EltWidth == 64)
3124 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3129 if (VecWidth == 512)
3131 Args.push_back(Mask);
3133 Rep = Builder.CreateIntrinsic(IID, Args);
3134 }
else if (Name.starts_with(
"avx512.mask.cmp.")) {
3138 }
else if (Name.starts_with(
"avx512.mask.ucmp.")) {
3141 }
else if (Name.starts_with(
"avx512.cvtb2mask.") ||
3142 Name.starts_with(
"avx512.cvtw2mask.") ||
3143 Name.starts_with(
"avx512.cvtd2mask.") ||
3144 Name.starts_with(
"avx512.cvtq2mask.")) {
3149 }
else if (Name ==
"ssse3.pabs.b.128" || Name ==
"ssse3.pabs.w.128" ||
3150 Name ==
"ssse3.pabs.d.128" || Name.starts_with(
"avx2.pabs") ||
3151 Name.starts_with(
"avx512.mask.pabs")) {
3153 }
else if (Name ==
"sse41.pmaxsb" || Name ==
"sse2.pmaxs.w" ||
3154 Name ==
"sse41.pmaxsd" || Name.starts_with(
"avx2.pmaxs") ||
3155 Name.starts_with(
"avx512.mask.pmaxs")) {
3157 }
else if (Name ==
"sse2.pmaxu.b" || Name ==
"sse41.pmaxuw" ||
3158 Name ==
"sse41.pmaxud" || Name.starts_with(
"avx2.pmaxu") ||
3159 Name.starts_with(
"avx512.mask.pmaxu")) {
3161 }
else if (Name ==
"sse41.pminsb" || Name ==
"sse2.pmins.w" ||
3162 Name ==
"sse41.pminsd" || Name.starts_with(
"avx2.pmins") ||
3163 Name.starts_with(
"avx512.mask.pmins")) {
3165 }
else if (Name ==
"sse2.pminu.b" || Name ==
"sse41.pminuw" ||
3166 Name ==
"sse41.pminud" || Name.starts_with(
"avx2.pminu") ||
3167 Name.starts_with(
"avx512.mask.pminu")) {
3169 }
else if (Name ==
"sse2.pmulu.dq" || Name ==
"avx2.pmulu.dq" ||
3170 Name ==
"avx512.pmulu.dq.512" ||
3171 Name.starts_with(
"avx512.mask.pmulu.dq.")) {
3173 }
else if (Name ==
"sse41.pmuldq" || Name ==
"avx2.pmul.dq" ||
3174 Name ==
"avx512.pmul.dq.512" ||
3175 Name.starts_with(
"avx512.mask.pmul.dq.")) {
3177 }
else if (Name ==
"sse.cvtsi2ss" || Name ==
"sse2.cvtsi2sd" ||
3178 Name ==
"sse.cvtsi642ss" || Name ==
"sse2.cvtsi642sd") {
3183 }
else if (Name ==
"avx512.cvtusi2sd") {
3188 }
else if (Name ==
"sse2.cvtss2sd") {
3190 Rep = Builder.CreateFPExt(
3193 }
else if (Name ==
"sse2.cvtdq2pd" || Name ==
"sse2.cvtdq2ps" ||
3194 Name ==
"avx.cvtdq2.pd.256" || Name ==
"avx.cvtdq2.ps.256" ||
3195 Name.starts_with(
"avx512.mask.cvtdq2pd.") ||
3196 Name.starts_with(
"avx512.mask.cvtudq2pd.") ||
3197 Name.starts_with(
"avx512.mask.cvtdq2ps.") ||
3198 Name.starts_with(
"avx512.mask.cvtudq2ps.") ||
3199 Name.starts_with(
"avx512.mask.cvtqq2pd.") ||
3200 Name.starts_with(
"avx512.mask.cvtuqq2pd.") ||
3201 Name ==
"avx512.mask.cvtqq2ps.256" ||
3202 Name ==
"avx512.mask.cvtqq2ps.512" ||
3203 Name ==
"avx512.mask.cvtuqq2ps.256" ||
3204 Name ==
"avx512.mask.cvtuqq2ps.512" || Name ==
"sse2.cvtps2pd" ||
3205 Name ==
"avx.cvt.ps2.pd.256" ||
3206 Name ==
"avx512.mask.cvtps2pd.128" ||
3207 Name ==
"avx512.mask.cvtps2pd.256") {
3212 unsigned NumDstElts = DstTy->getNumElements();
3214 assert(NumDstElts == 2 &&
"Unexpected vector size");
3215 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1});
3218 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3219 bool IsUnsigned = Name.contains(
"cvtu");
3221 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtps2pd");
3225 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3226 : Intrinsic::x86_avx512_sitofp_round;
3227 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3230 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy,
"cvt")
3231 : Builder.CreateSIToFP(Rep, DstTy,
"cvt");
3237 }
else if (Name.starts_with(
"avx512.mask.vcvtph2ps.") ||
3238 Name.starts_with(
"vcvtph2ps.")) {
3242 unsigned NumDstElts = DstTy->getNumElements();
3243 if (NumDstElts != SrcTy->getNumElements()) {
3244 assert(NumDstElts == 4 &&
"Unexpected vector size");
3245 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1, 2, 3});
3247 Rep = Builder.CreateBitCast(
3249 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtph2ps");
3253 }
else if (Name.starts_with(
"avx512.mask.load")) {
3255 bool Aligned = Name[16] !=
'u';
3258 }
else if (Name.starts_with(
"avx512.mask.expand.load.")) {
3261 ResultTy->getNumElements());
3263 Rep = Builder.CreateIntrinsic(
3264 Intrinsic::masked_expandload, ResultTy,
3266 }
else if (Name.starts_with(
"avx512.mask.compress.store.")) {
3272 Rep = Builder.CreateIntrinsic(
3273 Intrinsic::masked_compressstore, ResultTy,
3275 }
else if (Name.starts_with(
"avx512.mask.compress.") ||
3276 Name.starts_with(
"avx512.mask.expand.")) {
3280 ResultTy->getNumElements());
3282 bool IsCompress = Name[12] ==
'c';
3283 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3284 : Intrinsic::x86_avx512_mask_expand;
3285 Rep = Builder.CreateIntrinsic(
3287 }
else if (Name.starts_with(
"xop.vpcom")) {
3289 if (Name.ends_with(
"ub") || Name.ends_with(
"uw") || Name.ends_with(
"ud") ||
3290 Name.ends_with(
"uq"))
3292 else if (Name.ends_with(
"b") || Name.ends_with(
"w") ||
3293 Name.ends_with(
"d") || Name.ends_with(
"q"))
3302 Name = Name.substr(9);
3303 if (Name.starts_with(
"lt"))
3305 else if (Name.starts_with(
"le"))
3307 else if (Name.starts_with(
"gt"))
3309 else if (Name.starts_with(
"ge"))
3311 else if (Name.starts_with(
"eq"))
3313 else if (Name.starts_with(
"ne"))
3315 else if (Name.starts_with(
"false"))
3317 else if (Name.starts_with(
"true"))
3324 }
else if (Name.starts_with(
"xop.vpcmov")) {
3326 Value *NotSel = Builder.CreateNot(Sel);
3329 Rep = Builder.CreateOr(Sel0, Sel1);
3330 }
else if (Name.starts_with(
"xop.vprot") || Name.starts_with(
"avx512.prol") ||
3331 Name.starts_with(
"avx512.mask.prol")) {
3333 }
else if (Name.starts_with(
"avx512.pror") ||
3334 Name.starts_with(
"avx512.mask.pror")) {
3336 }
else if (Name.starts_with(
"avx512.vpshld.") ||
3337 Name.starts_with(
"avx512.mask.vpshld") ||
3338 Name.starts_with(
"avx512.maskz.vpshld")) {
3339 bool ZeroMask = Name[11] ==
'z';
3341 }
else if (Name.starts_with(
"avx512.vpshrd.") ||
3342 Name.starts_with(
"avx512.mask.vpshrd") ||
3343 Name.starts_with(
"avx512.maskz.vpshrd")) {
3344 bool ZeroMask = Name[11] ==
'z';
3346 }
else if (Name ==
"sse42.crc32.64.8") {
3349 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3351 Rep = Builder.CreateZExt(Rep, CI->
getType(),
"");
3352 }
else if (Name.starts_with(
"avx.vbroadcast.s") ||
3353 Name.starts_with(
"avx512.vbroadcast.s")) {
3356 Type *EltTy = VecTy->getElementType();
3357 unsigned EltNum = VecTy->getNumElements();
3361 for (
unsigned I = 0;
I < EltNum; ++
I)
3362 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty,
I));
3363 }
else if (Name.starts_with(
"sse41.pmovsx") ||
3364 Name.starts_with(
"sse41.pmovzx") ||
3365 Name.starts_with(
"avx2.pmovsx") ||
3366 Name.starts_with(
"avx2.pmovzx") ||
3367 Name.starts_with(
"avx512.mask.pmovsx") ||
3368 Name.starts_with(
"avx512.mask.pmovzx")) {
3370 unsigned NumDstElts = DstTy->getNumElements();
3374 for (
unsigned i = 0; i != NumDstElts; ++i)
3379 bool DoSext = Name.contains(
"pmovsx");
3381 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3386 }
else if (Name ==
"avx512.mask.pmov.qd.256" ||
3387 Name ==
"avx512.mask.pmov.qd.512" ||
3388 Name ==
"avx512.mask.pmov.wb.256" ||
3389 Name ==
"avx512.mask.pmov.wb.512") {
3394 }
else if (Name.starts_with(
"avx.vbroadcastf128") ||
3395 Name ==
"avx2.vbroadcasti128") {
3401 if (NumSrcElts == 2)
3402 Rep = Builder.CreateShuffleVector(Load,
ArrayRef<int>{0, 1, 0, 1});
3404 Rep = Builder.CreateShuffleVector(Load,
3406 }
else if (Name.starts_with(
"avx512.mask.shuf.i") ||
3407 Name.starts_with(
"avx512.mask.shuf.f")) {
3412 unsigned ControlBitsMask = NumLanes - 1;
3413 unsigned NumControlBits = NumLanes / 2;
3416 for (
unsigned l = 0; l != NumLanes; ++l) {
3417 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3419 if (l >= NumLanes / 2)
3420 LaneMask += NumLanes;
3421 for (
unsigned i = 0; i != NumElementsInLane; ++i)
3422 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3428 }
else if (Name.starts_with(
"avx512.mask.broadcastf") ||
3429 Name.starts_with(
"avx512.mask.broadcasti")) {
3432 unsigned NumDstElts =
3436 for (
unsigned i = 0; i != NumDstElts; ++i)
3437 ShuffleMask[i] = i % NumSrcElts;
3443 }
else if (Name.starts_with(
"avx2.pbroadcast") ||
3444 Name.starts_with(
"avx2.vbroadcast") ||
3445 Name.starts_with(
"avx512.pbroadcast") ||
3446 Name.starts_with(
"avx512.mask.broadcast.s")) {
3453 Rep = Builder.CreateShuffleVector(
Op, M);
3458 }
else if (Name.starts_with(
"sse2.padds.") ||
3459 Name.starts_with(
"avx2.padds.") ||
3460 Name.starts_with(
"avx512.padds.") ||
3461 Name.starts_with(
"avx512.mask.padds.")) {
3463 }
else if (Name.starts_with(
"sse2.psubs.") ||
3464 Name.starts_with(
"avx2.psubs.") ||
3465 Name.starts_with(
"avx512.psubs.") ||
3466 Name.starts_with(
"avx512.mask.psubs.")) {
3468 }
else if (Name.starts_with(
"sse2.paddus.") ||
3469 Name.starts_with(
"avx2.paddus.") ||
3470 Name.starts_with(
"avx512.mask.paddus.")) {
3472 }
else if (Name.starts_with(
"sse2.psubus.") ||
3473 Name.starts_with(
"avx2.psubus.") ||
3474 Name.starts_with(
"avx512.mask.psubus.")) {
3476 }
else if (Name.starts_with(
"avx512.mask.palignr.")) {
3481 }
else if (Name.starts_with(
"avx512.mask.valign.")) {
3485 }
else if (Name ==
"sse2.psll.dq" || Name ==
"avx2.psll.dq") {
3490 }
else if (Name ==
"sse2.psrl.dq" || Name ==
"avx2.psrl.dq") {
3495 }
else if (Name ==
"sse2.psll.dq.bs" || Name ==
"avx2.psll.dq.bs" ||
3496 Name ==
"avx512.psll.dq.512") {
3500 }
else if (Name ==
"sse2.psrl.dq.bs" || Name ==
"avx2.psrl.dq.bs" ||
3501 Name ==
"avx512.psrl.dq.512") {
3505 }
else if (Name ==
"sse41.pblendw" || Name.starts_with(
"sse41.blendp") ||
3506 Name.starts_with(
"avx.blend.p") || Name ==
"avx2.pblendw" ||
3507 Name.starts_with(
"avx2.pblendd.")) {
3512 unsigned NumElts = VecTy->getNumElements();
3515 for (
unsigned i = 0; i != NumElts; ++i)
3516 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3518 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3519 }
else if (Name.starts_with(
"avx.vinsertf128.") ||
3520 Name ==
"avx2.vinserti128" ||
3521 Name.starts_with(
"avx512.mask.insert")) {
3525 unsigned DstNumElts =
3527 unsigned SrcNumElts =
3529 unsigned Scale = DstNumElts / SrcNumElts;
3536 for (
unsigned i = 0; i != SrcNumElts; ++i)
3538 for (
unsigned i = SrcNumElts; i != DstNumElts; ++i)
3539 Idxs[i] = SrcNumElts;
3540 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3554 for (
unsigned i = 0; i != DstNumElts; ++i)
3557 for (
unsigned i = 0; i != SrcNumElts; ++i)
3558 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3559 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3565 }
else if (Name.starts_with(
"avx.vextractf128.") ||
3566 Name ==
"avx2.vextracti128" ||
3567 Name.starts_with(
"avx512.mask.vextract")) {
3570 unsigned DstNumElts =
3572 unsigned SrcNumElts =
3574 unsigned Scale = SrcNumElts / DstNumElts;
3581 for (
unsigned i = 0; i != DstNumElts; ++i) {
3582 Idxs[i] = i + (Imm * DstNumElts);
3584 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3590 }
else if (Name.starts_with(
"avx512.mask.perm.df.") ||
3591 Name.starts_with(
"avx512.mask.perm.di.")) {
3595 unsigned NumElts = VecTy->getNumElements();
3598 for (
unsigned i = 0; i != NumElts; ++i)
3599 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3601 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3606 }
else if (Name.starts_with(
"avx.vperm2f128.") || Name ==
"avx2.vperm2i128") {
3618 unsigned HalfSize = NumElts / 2;
3630 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3631 for (
unsigned i = 0; i < HalfSize; ++i)
3632 ShuffleMask[i] = StartIndex + i;
3635 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3636 for (
unsigned i = 0; i < HalfSize; ++i)
3637 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3639 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3641 }
else if (Name.starts_with(
"avx.vpermil.") || Name ==
"sse2.pshuf.d" ||
3642 Name.starts_with(
"avx512.mask.vpermil.p") ||
3643 Name.starts_with(
"avx512.mask.pshuf.d.")) {
3647 unsigned NumElts = VecTy->getNumElements();
3649 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3650 unsigned IdxMask = ((1 << IdxSize) - 1);
3656 for (
unsigned i = 0; i != NumElts; ++i)
3657 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3659 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3664 }
else if (Name ==
"sse2.pshufl.w" ||
3665 Name.starts_with(
"avx512.mask.pshufl.w.")) {
3670 if (Name ==
"sse2.pshufl.w" && NumElts % 8 != 0)
3674 for (
unsigned l = 0; l != NumElts; l += 8) {
3675 for (
unsigned i = 0; i != 4; ++i)
3676 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3677 for (
unsigned i = 4; i != 8; ++i)
3678 Idxs[i + l] = i + l;
3681 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3686 }
else if (Name ==
"sse2.pshufh.w" ||
3687 Name.starts_with(
"avx512.mask.pshufh.w.")) {
3692 if (Name ==
"sse2.pshufh.w" && NumElts % 8 != 0)
3696 for (
unsigned l = 0; l != NumElts; l += 8) {
3697 for (
unsigned i = 0; i != 4; ++i)
3698 Idxs[i + l] = i + l;
3699 for (
unsigned i = 0; i != 4; ++i)
3700 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3703 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3708 }
else if (Name.starts_with(
"avx512.mask.shuf.p")) {
3715 unsigned HalfLaneElts = NumLaneElts / 2;
3718 for (
unsigned i = 0; i != NumElts; ++i) {
3720 Idxs[i] = i - (i % NumLaneElts);
3722 if ((i % NumLaneElts) >= HalfLaneElts)
3726 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3729 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3733 }
else if (Name.starts_with(
"avx512.mask.movddup") ||
3734 Name.starts_with(
"avx512.mask.movshdup") ||
3735 Name.starts_with(
"avx512.mask.movsldup")) {
3741 if (Name.starts_with(
"avx512.mask.movshdup."))
3745 for (
unsigned l = 0; l != NumElts; l += NumLaneElts)
3746 for (
unsigned i = 0; i != NumLaneElts; i += 2) {
3747 Idxs[i + l + 0] = i + l +
Offset;
3748 Idxs[i + l + 1] = i + l +
Offset;
3751 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3755 }
else if (Name.starts_with(
"avx512.mask.punpckl") ||
3756 Name.starts_with(
"avx512.mask.unpckl.")) {
3763 for (
int l = 0; l != NumElts; l += NumLaneElts)
3764 for (
int i = 0; i != NumLaneElts; ++i)
3765 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3767 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3771 }
else if (Name.starts_with(
"avx512.mask.punpckh") ||
3772 Name.starts_with(
"avx512.mask.unpckh.")) {
3779 for (
int l = 0; l != NumElts; l += NumLaneElts)
3780 for (
int i = 0; i != NumLaneElts; ++i)
3781 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3783 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3787 }
else if (Name.starts_with(
"avx512.mask.and.") ||
3788 Name.starts_with(
"avx512.mask.pand.")) {
3791 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3793 Rep = Builder.CreateBitCast(Rep, FTy);
3796 }
else if (Name.starts_with(
"avx512.mask.andn.") ||
3797 Name.starts_with(
"avx512.mask.pandn.")) {
3800 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->
getArgOperand(0), ITy));
3801 Rep = Builder.CreateAnd(Rep,
3803 Rep = Builder.CreateBitCast(Rep, FTy);
3806 }
else if (Name.starts_with(
"avx512.mask.or.") ||
3807 Name.starts_with(
"avx512.mask.por.")) {
3810 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3812 Rep = Builder.CreateBitCast(Rep, FTy);
3815 }
else if (Name.starts_with(
"avx512.mask.xor.") ||
3816 Name.starts_with(
"avx512.mask.pxor.")) {
3819 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3821 Rep = Builder.CreateBitCast(Rep, FTy);
3824 }
else if (Name.starts_with(
"avx512.mask.padd.")) {
3828 }
else if (Name.starts_with(
"avx512.mask.psub.")) {
3832 }
else if (Name.starts_with(
"avx512.mask.pmull.")) {
3836 }
else if (Name.starts_with(
"avx512.mask.add.p")) {
3837 if (Name.ends_with(
".512")) {
3839 if (Name[17] ==
's')
3840 IID = Intrinsic::x86_avx512_add_ps_512;
3842 IID = Intrinsic::x86_avx512_add_pd_512;
3844 Rep = Builder.CreateIntrinsic(
3852 }
else if (Name.starts_with(
"avx512.mask.div.p")) {
3853 if (Name.ends_with(
".512")) {
3855 if (Name[17] ==
's')
3856 IID = Intrinsic::x86_avx512_div_ps_512;
3858 IID = Intrinsic::x86_avx512_div_pd_512;
3860 Rep = Builder.CreateIntrinsic(
3868 }
else if (Name.starts_with(
"avx512.mask.mul.p")) {
3869 if (Name.ends_with(
".512")) {
3871 if (Name[17] ==
's')
3872 IID = Intrinsic::x86_avx512_mul_ps_512;
3874 IID = Intrinsic::x86_avx512_mul_pd_512;
3876 Rep = Builder.CreateIntrinsic(
3884 }
else if (Name.starts_with(
"avx512.mask.sub.p")) {
3885 if (Name.ends_with(
".512")) {
3887 if (Name[17] ==
's')
3888 IID = Intrinsic::x86_avx512_sub_ps_512;
3890 IID = Intrinsic::x86_avx512_sub_pd_512;
3892 Rep = Builder.CreateIntrinsic(
3900 }
else if ((Name.starts_with(
"avx512.mask.max.p") ||
3901 Name.starts_with(
"avx512.mask.min.p")) &&
3902 Name.drop_front(18) ==
".512") {
3903 bool IsDouble = Name[17] ==
'd';
3904 bool IsMin = Name[13] ==
'i';
3906 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3907 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3910 Rep = Builder.CreateIntrinsic(
3915 }
else if (Name.starts_with(
"avx512.mask.lzcnt.")) {
3917 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->
getType(),
3918 {CI->getArgOperand(0), Builder.getInt1(false)});
3921 }
else if (Name.starts_with(
"avx512.mask.psll")) {
3922 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3923 bool IsVariable = Name[16] ==
'v';
3924 char Size = Name[16] ==
'.' ? Name[17]
3925 : Name[17] ==
'.' ? Name[18]
3926 : Name[18] ==
'.' ? Name[19]
3930 if (IsVariable && Name[17] !=
'.') {
3931 if (
Size ==
'd' && Name[17] ==
'2')
3932 IID = Intrinsic::x86_avx2_psllv_q;
3933 else if (
Size ==
'd' && Name[17] ==
'4')
3934 IID = Intrinsic::x86_avx2_psllv_q_256;
3935 else if (
Size ==
's' && Name[17] ==
'4')
3936 IID = Intrinsic::x86_avx2_psllv_d;
3937 else if (
Size ==
's' && Name[17] ==
'8')
3938 IID = Intrinsic::x86_avx2_psllv_d_256;
3939 else if (
Size ==
'h' && Name[17] ==
'8')
3940 IID = Intrinsic::x86_avx512_psllv_w_128;
3941 else if (
Size ==
'h' && Name[17] ==
'1')
3942 IID = Intrinsic::x86_avx512_psllv_w_256;
3943 else if (Name[17] ==
'3' && Name[18] ==
'2')
3944 IID = Intrinsic::x86_avx512_psllv_w_512;
3947 }
else if (Name.ends_with(
".128")) {
3949 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3950 : Intrinsic::x86_sse2_psll_d;
3951 else if (
Size ==
'q')
3952 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3953 : Intrinsic::x86_sse2_psll_q;
3954 else if (
Size ==
'w')
3955 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3956 : Intrinsic::x86_sse2_psll_w;
3959 }
else if (Name.ends_with(
".256")) {
3961 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3962 : Intrinsic::x86_avx2_psll_d;
3963 else if (
Size ==
'q')
3964 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3965 : Intrinsic::x86_avx2_psll_q;
3966 else if (
Size ==
'w')
3967 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3968 : Intrinsic::x86_avx2_psll_w;
3973 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3974 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3975 : Intrinsic::x86_avx512_psll_d_512;
3976 else if (
Size ==
'q')
3977 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3978 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3979 : Intrinsic::x86_avx512_psll_q_512;
3980 else if (
Size ==
'w')
3981 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3982 : Intrinsic::x86_avx512_psll_w_512;
3988 }
else if (Name.starts_with(
"avx512.mask.psrl")) {
3989 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3990 bool IsVariable = Name[16] ==
'v';
3991 char Size = Name[16] ==
'.' ? Name[17]
3992 : Name[17] ==
'.' ? Name[18]
3993 : Name[18] ==
'.' ? Name[19]
3997 if (IsVariable && Name[17] !=
'.') {
3998 if (
Size ==
'd' && Name[17] ==
'2')
3999 IID = Intrinsic::x86_avx2_psrlv_q;
4000 else if (
Size ==
'd' && Name[17] ==
'4')
4001 IID = Intrinsic::x86_avx2_psrlv_q_256;
4002 else if (
Size ==
's' && Name[17] ==
'4')
4003 IID = Intrinsic::x86_avx2_psrlv_d;
4004 else if (
Size ==
's' && Name[17] ==
'8')
4005 IID = Intrinsic::x86_avx2_psrlv_d_256;
4006 else if (
Size ==
'h' && Name[17] ==
'8')
4007 IID = Intrinsic::x86_avx512_psrlv_w_128;
4008 else if (
Size ==
'h' && Name[17] ==
'1')
4009 IID = Intrinsic::x86_avx512_psrlv_w_256;
4010 else if (Name[17] ==
'3' && Name[18] ==
'2')
4011 IID = Intrinsic::x86_avx512_psrlv_w_512;
4014 }
else if (Name.ends_with(
".128")) {
4016 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
4017 : Intrinsic::x86_sse2_psrl_d;
4018 else if (
Size ==
'q')
4019 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
4020 : Intrinsic::x86_sse2_psrl_q;
4021 else if (
Size ==
'w')
4022 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
4023 : Intrinsic::x86_sse2_psrl_w;
4026 }
else if (Name.ends_with(
".256")) {
4028 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
4029 : Intrinsic::x86_avx2_psrl_d;
4030 else if (
Size ==
'q')
4031 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
4032 : Intrinsic::x86_avx2_psrl_q;
4033 else if (
Size ==
'w')
4034 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
4035 : Intrinsic::x86_avx2_psrl_w;
4040 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
4041 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
4042 : Intrinsic::x86_avx512_psrl_d_512;
4043 else if (
Size ==
'q')
4044 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
4045 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
4046 : Intrinsic::x86_avx512_psrl_q_512;
4047 else if (
Size ==
'w')
4048 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
4049 : Intrinsic::x86_avx512_psrl_w_512;
4055 }
else if (Name.starts_with(
"avx512.mask.psra")) {
4056 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
4057 bool IsVariable = Name[16] ==
'v';
4058 char Size = Name[16] ==
'.' ? Name[17]
4059 : Name[17] ==
'.' ? Name[18]
4060 : Name[18] ==
'.' ? Name[19]
4064 if (IsVariable && Name[17] !=
'.') {
4065 if (
Size ==
's' && Name[17] ==
'4')
4066 IID = Intrinsic::x86_avx2_psrav_d;
4067 else if (
Size ==
's' && Name[17] ==
'8')
4068 IID = Intrinsic::x86_avx2_psrav_d_256;
4069 else if (
Size ==
'h' && Name[17] ==
'8')
4070 IID = Intrinsic::x86_avx512_psrav_w_128;
4071 else if (
Size ==
'h' && Name[17] ==
'1')
4072 IID = Intrinsic::x86_avx512_psrav_w_256;
4073 else if (Name[17] ==
'3' && Name[18] ==
'2')
4074 IID = Intrinsic::x86_avx512_psrav_w_512;
4077 }
else if (Name.ends_with(
".128")) {
4079 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4080 : Intrinsic::x86_sse2_psra_d;
4081 else if (
Size ==
'q')
4082 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4083 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4084 : Intrinsic::x86_avx512_psra_q_128;
4085 else if (
Size ==
'w')
4086 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4087 : Intrinsic::x86_sse2_psra_w;
4090 }
else if (Name.ends_with(
".256")) {
4092 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4093 : Intrinsic::x86_avx2_psra_d;
4094 else if (
Size ==
'q')
4095 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4096 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4097 : Intrinsic::x86_avx512_psra_q_256;
4098 else if (
Size ==
'w')
4099 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4100 : Intrinsic::x86_avx2_psra_w;
4105 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4106 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4107 : Intrinsic::x86_avx512_psra_d_512;
4108 else if (
Size ==
'q')
4109 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4110 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4111 : Intrinsic::x86_avx512_psra_q_512;
4112 else if (
Size ==
'w')
4113 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4114 : Intrinsic::x86_avx512_psra_w_512;
4120 }
else if (Name.starts_with(
"avx512.mask.move.s")) {
4122 }
else if (Name.starts_with(
"avx512.cvtmask2")) {
4124 }
else if (Name.ends_with(
".movntdqa")) {
4128 LoadInst *LI = Builder.CreateAlignedLoad(
4133 }
else if (Name.starts_with(
"fma.vfmadd.") ||
4134 Name.starts_with(
"fma.vfmsub.") ||
4135 Name.starts_with(
"fma.vfnmadd.") ||
4136 Name.starts_with(
"fma.vfnmsub.")) {
4137 bool NegMul = Name[6] ==
'n';
4138 bool NegAcc = NegMul ? Name[8] ==
's' : Name[7] ==
's';
4139 bool IsScalar = NegMul ? Name[12] ==
's' : Name[11] ==
's';
4150 if (NegMul && !IsScalar)
4151 Ops[0] = Builder.CreateFNeg(
Ops[0]);
4152 if (NegMul && IsScalar)
4153 Ops[1] = Builder.CreateFNeg(
Ops[1]);
4155 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4157 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
4161 }
else if (Name.starts_with(
"fma4.vfmadd.s")) {
4169 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
4173 }
else if (Name.starts_with(
"avx512.mask.vfmadd.s") ||
4174 Name.starts_with(
"avx512.maskz.vfmadd.s") ||
4175 Name.starts_with(
"avx512.mask3.vfmadd.s") ||
4176 Name.starts_with(
"avx512.mask3.vfmsub.s") ||
4177 Name.starts_with(
"avx512.mask3.vfnmsub.s")) {
4178 bool IsMask3 = Name[11] ==
'3';
4179 bool IsMaskZ = Name[11] ==
'z';
4181 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4182 bool NegMul = Name[2] ==
'n';
4183 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
4189 if (NegMul && (IsMask3 || IsMaskZ))
4190 A = Builder.CreateFNeg(
A);
4191 if (NegMul && !(IsMask3 || IsMaskZ))
4192 B = Builder.CreateFNeg(
B);
4194 C = Builder.CreateFNeg(
C);
4196 A = Builder.CreateExtractElement(
A, (
uint64_t)0);
4197 B = Builder.CreateExtractElement(
B, (
uint64_t)0);
4198 C = Builder.CreateExtractElement(
C, (
uint64_t)0);
4205 if (Name.back() ==
'd')
4206 IID = Intrinsic::x86_avx512_vfmadd_f64;
4208 IID = Intrinsic::x86_avx512_vfmadd_f32;
4209 Rep = Builder.CreateIntrinsic(IID,
Ops);
4211 Rep = Builder.CreateFMA(
A,
B,
C);
4220 if (NegAcc && IsMask3)
4225 Rep = Builder.CreateInsertElement(CI->
getArgOperand(IsMask3 ? 2 : 0), Rep,
4227 }
else if (Name.starts_with(
"avx512.mask.vfmadd.p") ||
4228 Name.starts_with(
"avx512.mask.vfnmadd.p") ||
4229 Name.starts_with(
"avx512.mask.vfnmsub.p") ||
4230 Name.starts_with(
"avx512.mask3.vfmadd.p") ||
4231 Name.starts_with(
"avx512.mask3.vfmsub.p") ||
4232 Name.starts_with(
"avx512.mask3.vfnmsub.p") ||
4233 Name.starts_with(
"avx512.maskz.vfmadd.p")) {
4234 bool IsMask3 = Name[11] ==
'3';
4235 bool IsMaskZ = Name[11] ==
'z';
4237 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4238 bool NegMul = Name[2] ==
'n';
4239 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
4245 if (NegMul && (IsMask3 || IsMaskZ))
4246 A = Builder.CreateFNeg(
A);
4247 if (NegMul && !(IsMask3 || IsMaskZ))
4248 B = Builder.CreateFNeg(
B);
4250 C = Builder.CreateFNeg(
C);
4257 if (Name[Name.size() - 5] ==
's')
4258 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4260 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4264 Rep = Builder.CreateFMA(
A,
B,
C);
4272 }
else if (Name.starts_with(
"fma.vfmsubadd.p")) {
4276 if (VecWidth == 128 && EltWidth == 32)
4277 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4278 else if (VecWidth == 256 && EltWidth == 32)
4279 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4280 else if (VecWidth == 128 && EltWidth == 64)
4281 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4282 else if (VecWidth == 256 && EltWidth == 64)
4283 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4289 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4290 Rep = Builder.CreateIntrinsic(IID,
Ops);
4291 }
else if (Name.starts_with(
"avx512.mask.vfmaddsub.p") ||
4292 Name.starts_with(
"avx512.mask3.vfmaddsub.p") ||
4293 Name.starts_with(
"avx512.maskz.vfmaddsub.p") ||
4294 Name.starts_with(
"avx512.mask3.vfmsubadd.p")) {
4295 bool IsMask3 = Name[11] ==
'3';
4296 bool IsMaskZ = Name[11] ==
'z';
4298 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4299 bool IsSubAdd = Name[3] ==
's';
4303 if (Name[Name.size() - 5] ==
's')
4304 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4306 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4311 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4313 Rep = Builder.CreateIntrinsic(IID,
Ops);
4322 Value *Odd = Builder.CreateCall(FMA,
Ops);
4323 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4324 Value *Even = Builder.CreateCall(FMA,
Ops);
4330 for (
int i = 0; i != NumElts; ++i)
4331 Idxs[i] = i + (i % 2) * NumElts;
4333 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4341 }
else if (Name.starts_with(
"avx512.mask.pternlog.") ||
4342 Name.starts_with(
"avx512.maskz.pternlog.")) {
4343 bool ZeroMask = Name[11] ==
'z';
4347 if (VecWidth == 128 && EltWidth == 32)
4348 IID = Intrinsic::x86_avx512_pternlog_d_128;
4349 else if (VecWidth == 256 && EltWidth == 32)
4350 IID = Intrinsic::x86_avx512_pternlog_d_256;
4351 else if (VecWidth == 512 && EltWidth == 32)
4352 IID = Intrinsic::x86_avx512_pternlog_d_512;
4353 else if (VecWidth == 128 && EltWidth == 64)
4354 IID = Intrinsic::x86_avx512_pternlog_q_128;
4355 else if (VecWidth == 256 && EltWidth == 64)
4356 IID = Intrinsic::x86_avx512_pternlog_q_256;
4357 else if (VecWidth == 512 && EltWidth == 64)
4358 IID = Intrinsic::x86_avx512_pternlog_q_512;
4364 Rep = Builder.CreateIntrinsic(IID, Args);
4368 }
else if (Name.starts_with(
"avx512.mask.vpmadd52") ||
4369 Name.starts_with(
"avx512.maskz.vpmadd52")) {
4370 bool ZeroMask = Name[11] ==
'z';
4371 bool High = Name[20] ==
'h' || Name[21] ==
'h';
4374 if (VecWidth == 128 && !
High)
4375 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4376 else if (VecWidth == 256 && !
High)
4377 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4378 else if (VecWidth == 512 && !
High)
4379 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4380 else if (VecWidth == 128 &&
High)
4381 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4382 else if (VecWidth == 256 &&
High)
4383 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4384 else if (VecWidth == 512 &&
High)
4385 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4391 Rep = Builder.CreateIntrinsic(IID, Args);
4395 }
else if (Name.starts_with(
"avx512.mask.vpermi2var.") ||
4396 Name.starts_with(
"avx512.mask.vpermt2var.") ||
4397 Name.starts_with(
"avx512.maskz.vpermt2var.")) {
4398 bool ZeroMask = Name[11] ==
'z';
4399 bool IndexForm = Name[17] ==
'i';
4401 }
else if (Name.starts_with(
"avx512.mask.vpdpbusd.") ||
4402 Name.starts_with(
"avx512.maskz.vpdpbusd.") ||
4403 Name.starts_with(
"avx512.mask.vpdpbusds.") ||
4404 Name.starts_with(
"avx512.maskz.vpdpbusds.")) {
4405 bool ZeroMask = Name[11] ==
'z';
4406 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4409 if (VecWidth == 128 && !IsSaturating)
4410 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4411 else if (VecWidth == 256 && !IsSaturating)
4412 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4413 else if (VecWidth == 512 && !IsSaturating)
4414 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4415 else if (VecWidth == 128 && IsSaturating)
4416 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4417 else if (VecWidth == 256 && IsSaturating)
4418 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4419 else if (VecWidth == 512 && IsSaturating)
4420 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4430 if (Args[1]->
getType()->isVectorTy() &&
4433 ->isIntegerTy(32) &&
4434 Args[2]->
getType()->isVectorTy() &&
4437 ->isIntegerTy(32)) {
4438 Type *NewArgType =
nullptr;
4439 if (VecWidth == 128)
4441 else if (VecWidth == 256)
4443 else if (VecWidth == 512)
4449 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4450 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4453 Rep = Builder.CreateIntrinsic(IID, Args);
4457 }
else if (Name.starts_with(
"avx512.mask.vpdpwssd.") ||
4458 Name.starts_with(
"avx512.maskz.vpdpwssd.") ||
4459 Name.starts_with(
"avx512.mask.vpdpwssds.") ||
4460 Name.starts_with(
"avx512.maskz.vpdpwssds.")) {
4461 bool ZeroMask = Name[11] ==
'z';
4462 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4465 if (VecWidth == 128 && !IsSaturating)
4466 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4467 else if (VecWidth == 256 && !IsSaturating)
4468 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4469 else if (VecWidth == 512 && !IsSaturating)
4470 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4471 else if (VecWidth == 128 && IsSaturating)
4472 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4473 else if (VecWidth == 256 && IsSaturating)
4474 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4475 else if (VecWidth == 512 && IsSaturating)
4476 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4486 if (Args[1]->
getType()->isVectorTy() &&
4489 ->isIntegerTy(32) &&
4490 Args[2]->
getType()->isVectorTy() &&
4493 ->isIntegerTy(32)) {
4494 Type *NewArgType =
nullptr;
4495 if (VecWidth == 128)
4497 else if (VecWidth == 256)
4499 else if (VecWidth == 512)
4505 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4506 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4509 Rep = Builder.CreateIntrinsic(IID, Args);
4513 }
else if (Name ==
"addcarryx.u32" || Name ==
"addcarryx.u64" ||
4514 Name ==
"addcarry.u32" || Name ==
"addcarry.u64" ||
4515 Name ==
"subborrow.u32" || Name ==
"subborrow.u64") {
4517 if (Name[0] ==
'a' && Name.back() ==
'2')
4518 IID = Intrinsic::x86_addcarry_32;
4519 else if (Name[0] ==
'a' && Name.back() ==
'4')
4520 IID = Intrinsic::x86_addcarry_64;
4521 else if (Name[0] ==
's' && Name.back() ==
'2')
4522 IID = Intrinsic::x86_subborrow_32;
4523 else if (Name[0] ==
's' && Name.back() ==
'4')
4524 IID = Intrinsic::x86_subborrow_64;
4531 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4534 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
4537 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4541 }
else if (Name.starts_with(
"avx512.mask.") &&
4552 if (Name.starts_with(
"neon.bfcvt")) {
4553 if (Name.starts_with(
"neon.bfcvtn2")) {
4555 std::iota(LoMask.
begin(), LoMask.
end(), 0);
4557 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4558 Value *Inactive = Builder.CreateShuffleVector(CI->
getOperand(0), LoMask);
4561 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4562 }
else if (Name.starts_with(
"neon.bfcvtn")) {
4564 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4568 dbgs() <<
"Trunc: " << *Trunc <<
"\n";
4569 return Builder.CreateShuffleVector(
4572 return Builder.CreateFPTrunc(CI->
getOperand(0),
4575 }
else if (Name.starts_with(
"sve.fcvt")) {
4578 .
Case(
"sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4579 .
Case(
"sve.fcvtnt.bf16f32",
4580 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4592 if (Args[1]->
getType() != BadPredTy)
4595 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4596 BadPredTy, Args[1]);
4597 Args[1] = Builder.CreateIntrinsic(
4598 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4600 return Builder.CreateIntrinsic(NewID, Args,
nullptr,
4609 if (Name ==
"mve.vctp64.old") {
4612 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4615 Value *C1 = Builder.CreateIntrinsic(
4616 Intrinsic::arm_mve_pred_v2i,
4618 return Builder.CreateIntrinsic(
4619 Intrinsic::arm_mve_pred_i2v,
4621 }
else if (Name ==
"mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4622 Name ==
"mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4623 Name ==
"mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4624 Name ==
"mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4626 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4627 Name ==
"mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4628 Name ==
"mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4629 Name ==
"mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4631 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4632 Name ==
"mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4633 Name ==
"cde.vcx1q.predicated.v2i64.v4i1" ||
4634 Name ==
"cde.vcx1qa.predicated.v2i64.v4i1" ||
4635 Name ==
"cde.vcx2q.predicated.v2i64.v4i1" ||
4636 Name ==
"cde.vcx2qa.predicated.v2i64.v4i1" ||
4637 Name ==
"cde.vcx3q.predicated.v2i64.v4i1" ||
4638 Name ==
"cde.vcx3qa.predicated.v2i64.v4i1") {
4639 std::vector<Type *> Tys;
4643 case Intrinsic::arm_mve_mull_int_predicated:
4644 case Intrinsic::arm_mve_vqdmull_predicated:
4645 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4648 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4649 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4650 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4654 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4658 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4662 case Intrinsic::arm_cde_vcx1q_predicated:
4663 case Intrinsic::arm_cde_vcx1qa_predicated:
4664 case Intrinsic::arm_cde_vcx2q_predicated:
4665 case Intrinsic::arm_cde_vcx2qa_predicated:
4666 case Intrinsic::arm_cde_vcx3q_predicated:
4667 case Intrinsic::arm_cde_vcx3qa_predicated:
4674 std::vector<Value *>
Ops;
4676 Type *Ty =
Op->getType();
4677 if (Ty->getScalarSizeInBits() == 1) {
4678 Value *C1 = Builder.CreateIntrinsic(
4679 Intrinsic::arm_mve_pred_v2i,
4681 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4686 return Builder.CreateIntrinsic(
ID, Tys,
Ops,
nullptr,
4701 auto UpgradeLegacyWMMAIUIntrinsicCall =
4706 Args.push_back(Builder.getFalse());
4710 F->getParent(),
F->getIntrinsicID(), OverloadTys);
4717 auto *NewCall =
cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4722 NewCall->copyMetadata(*CI);
4726 if (
F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
4727 assert(CI->
arg_size() == 7 &&
"Legacy int_amdgcn_wmma_i32_16x16x64_iu8 "
4728 "intrinsic should have 7 arguments");
4731 return UpgradeLegacyWMMAIUIntrinsicCall(
F, CI, Builder, {
T1, T2});
4733 if (
F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8) {
4734 assert(CI->
arg_size() == 8 &&
"Legacy int_amdgcn_swmmac_i32_16x16x128_iu8 "
4735 "intrinsic should have 8 arguments");
4740 return UpgradeLegacyWMMAIUIntrinsicCall(
F, CI, Builder, {
T1, T2, T3, T4});
4743 switch (
F->getIntrinsicID()) {
4746 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
4747 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
4748 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
4749 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
4750 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
4751 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16: {
4766 if (
F->getIntrinsicID() == Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16)
4769 F->getParent(),
F->getIntrinsicID(), Overloads);
4774 auto *NewCall =
cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4779 NewCall->copyMetadata(*CI);
4780 NewCall->takeName(CI);
4802 if (NumOperands < 3)
4815 bool IsVolatile =
false;
4819 if (NumOperands > 3)
4824 if (NumOperands > 5) {
4826 IsVolatile = !VolatileArg || !VolatileArg->
isZero();
4840 if (VT->getElementType()->isIntegerTy(16)) {
4843 Val = Builder.CreateBitCast(Val, AsBF16);
4851 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4853 unsigned AddrSpace = PtrTy->getAddressSpace();
4856 RMW->
setMetadata(
"amdgpu.no.fine.grained.memory", EmptyMD);
4858 RMW->
setMetadata(
"amdgpu.ignore.denormal.mode", EmptyMD);
4863 MDNode *RangeNotPrivate =
4866 RMW->
setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4872 return Builder.CreateBitCast(RMW, RetTy);
4893 return MAV->getMetadata();
4902 if (Name ==
"label") {
4904 }
else if (Name ==
"assign") {
4911 }
else if (Name ==
"declare") {
4915 }
else if (Name ==
"addr") {
4925 unwrapMAVOp(CI, 1), ExprNode,
nullptr,
nullptr,
nullptr);
4926 }
else if (Name ==
"value") {
4929 unsigned ExprOp = 2;
4944 assert(DR &&
"Unhandled intrinsic kind in upgrade to DbgRecord");
4952 int64_t OffsetVal =
Offset->getSExtValue();
4953 return Builder.CreateIntrinsic(OffsetVal >= 0
4954 ? Intrinsic::vector_splice_left
4955 : Intrinsic::vector_splice_right,
4957 {CI->getArgOperand(0), CI->getArgOperand(1),
4958 Builder.getInt32(std::abs(OffsetVal))});
4963 if (Name.starts_with(
"to.fp16")) {
4965 Builder.CreateFPTrunc(CI->
getArgOperand(0), Builder.getHalfTy());
4966 return Builder.CreateBitCast(Cast, CI->
getType());
4969 if (Name.starts_with(
"from.fp16")) {
4971 Builder.CreateBitCast(CI->
getArgOperand(0), Builder.getHalfTy());
4972 return Builder.CreateFPExt(Cast, CI->
getType());
4997 if (!Name.consume_front(
"llvm."))
5000 bool IsX86 = Name.consume_front(
"x86.");
5001 bool IsNVVM = Name.consume_front(
"nvvm.");
5002 bool IsAArch64 = Name.consume_front(
"aarch64.");
5003 bool IsARM = Name.consume_front(
"arm.");
5004 bool IsAMDGCN = Name.consume_front(
"amdgcn.");
5005 bool IsDbg = Name.consume_front(
"dbg.");
5007 (Name.consume_front(
"experimental.vector.splice") ||
5008 Name.consume_front(
"vector.splice")) &&
5009 !(Name.starts_with(
".left") || Name.starts_with(
".right"));
5010 Value *Rep =
nullptr;
5012 if (!IsX86 && Name ==
"stackprotectorcheck") {
5014 }
else if (IsNVVM) {
5018 }
else if (IsAArch64) {
5022 }
else if (IsAMDGCN) {
5026 }
else if (IsOldSplice) {
5028 }
else if (Name.consume_front(
"convert.")) {
5040 const auto &DefaultCase = [&]() ->
void {
5048 "Unknown function for CallBase upgrade and isn't just a name change");
5056 "Return type must have changed");
5057 assert(OldST->getNumElements() ==
5059 "Must have same number of elements");
5062 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
5065 for (
unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
5066 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
5067 Res = Builder.CreateInsertValue(Res, Elem, Idx);
5086 case Intrinsic::arm_neon_vst1:
5087 case Intrinsic::arm_neon_vst2:
5088 case Intrinsic::arm_neon_vst3:
5089 case Intrinsic::arm_neon_vst4:
5090 case Intrinsic::arm_neon_vst2lane:
5091 case Intrinsic::arm_neon_vst3lane:
5092 case Intrinsic::arm_neon_vst4lane: {
5094 NewCall = Builder.CreateCall(NewFn, Args);
5097 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
5098 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
5099 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
5104 NewCall = Builder.CreateCall(NewFn, Args);
5107 case Intrinsic::aarch64_sve_ld3_sret:
5108 case Intrinsic::aarch64_sve_ld4_sret:
5109 case Intrinsic::aarch64_sve_ld2_sret: {
5117 Name = Name.substr(5);
5124 unsigned MinElts = RetTy->getMinNumElements() /
N;
5126 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
5128 for (
unsigned I = 0;
I <
N;
I++) {
5129 Value *SRet = Builder.CreateExtractValue(NewLdCall,
I);
5130 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet,
I * MinElts);
5136 case Intrinsic::coro_end: {
5139 NewCall = Builder.CreateCall(NewFn, Args);
5143 case Intrinsic::vector_extract: {
5145 Name = Name.substr(5);
5146 if (!Name.starts_with(
"aarch64.sve.tuple.get")) {
5151 unsigned MinElts = RetTy->getMinNumElements();
5154 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0), NewIdx});
5158 case Intrinsic::vector_insert: {
5160 Name = Name.substr(5);
5161 if (!Name.starts_with(
"aarch64.sve.tuple")) {
5165 if (Name.starts_with(
"aarch64.sve.tuple.set")) {
5170 NewCall = Builder.CreateCall(
5174 if (Name.starts_with(
"aarch64.sve.tuple.create")) {
5180 assert(
N > 1 &&
"Create is expected to be between 2-4");
5183 unsigned MinElts = RetTy->getMinNumElements() /
N;
5184 for (
unsigned I = 0;
I <
N;
I++) {
5186 Ret = Builder.CreateInsertVector(RetTy, Ret, V,
I * MinElts);
5193 case Intrinsic::arm_neon_bfdot:
5194 case Intrinsic::arm_neon_bfmmla:
5195 case Intrinsic::arm_neon_bfmlalb:
5196 case Intrinsic::arm_neon_bfmlalt:
5197 case Intrinsic::aarch64_neon_bfdot:
5198 case Intrinsic::aarch64_neon_bfmmla:
5199 case Intrinsic::aarch64_neon_bfmlalb:
5200 case Intrinsic::aarch64_neon_bfmlalt: {
5203 "Mismatch between function args and call args");
5204 size_t OperandWidth =
5206 assert((OperandWidth == 64 || OperandWidth == 128) &&
5207 "Unexpected operand width");
5209 auto Iter = CI->
args().begin();
5210 Args.push_back(*Iter++);
5211 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5212 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5213 NewCall = Builder.CreateCall(NewFn, Args);
5217 case Intrinsic::bitreverse:
5218 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5221 case Intrinsic::ctlz:
5222 case Intrinsic::cttz: {
5229 Builder.CreateCall(NewFn, {CI->
getArgOperand(0), Builder.getFalse()});
5233 case Intrinsic::objectsize: {
5234 Value *NullIsUnknownSize =
5238 NewCall = Builder.CreateCall(
5243 case Intrinsic::ctpop:
5244 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5246 case Intrinsic::dbg_value: {
5248 Name = Name.substr(5);
5250 if (Name.starts_with(
"dbg.addr")) {
5264 if (
Offset->isNullValue()) {
5265 NewCall = Builder.CreateCall(
5274 case Intrinsic::ptr_annotation:
5282 NewCall = Builder.CreateCall(
5291 case Intrinsic::var_annotation:
5298 NewCall = Builder.CreateCall(
5307 case Intrinsic::riscv_aes32dsi:
5308 case Intrinsic::riscv_aes32dsmi:
5309 case Intrinsic::riscv_aes32esi:
5310 case Intrinsic::riscv_aes32esmi:
5311 case Intrinsic::riscv_sm4ks:
5312 case Intrinsic::riscv_sm4ed: {
5322 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5323 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5329 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5330 Value *Res = NewCall;
5332 Res = Builder.CreateIntCast(NewCall, CI->
getType(),
true);
5338 case Intrinsic::nvvm_mapa_shared_cluster: {
5342 Value *Res = NewCall;
5343 Res = Builder.CreateAddrSpaceCast(
5350 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5351 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5354 Args[0] = Builder.CreateAddrSpaceCast(
5357 NewCall = Builder.CreateCall(NewFn, Args);
5363 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5364 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5365 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5366 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5367 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5368 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5369 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5370 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5377 Args[0] = Builder.CreateAddrSpaceCast(
5386 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5388 NewCall = Builder.CreateCall(NewFn, Args);
5394 case Intrinsic::riscv_sha256sig0:
5395 case Intrinsic::riscv_sha256sig1:
5396 case Intrinsic::riscv_sha256sum0:
5397 case Intrinsic::riscv_sha256sum1:
5398 case Intrinsic::riscv_sm3p0:
5399 case Intrinsic::riscv_sm3p1: {
5406 Builder.CreateTrunc(CI->
getArgOperand(0), Builder.getInt32Ty());
5408 NewCall = Builder.CreateCall(NewFn, Arg);
5410 Builder.CreateIntCast(NewCall, CI->
getType(),
true);
5417 case Intrinsic::x86_xop_vfrcz_ss:
5418 case Intrinsic::x86_xop_vfrcz_sd:
5419 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(1)});
5422 case Intrinsic::x86_xop_vpermil2pd:
5423 case Intrinsic::x86_xop_vpermil2ps:
5424 case Intrinsic::x86_xop_vpermil2pd_256:
5425 case Intrinsic::x86_xop_vpermil2ps_256: {
5429 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5430 NewCall = Builder.CreateCall(NewFn, Args);
5434 case Intrinsic::x86_sse41_ptestc:
5435 case Intrinsic::x86_sse41_ptestz:
5436 case Intrinsic::x86_sse41_ptestnzc: {
5450 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy,
"cast");
5451 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
5453 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5457 case Intrinsic::x86_rdtscp: {
5463 NewCall = Builder.CreateCall(NewFn);
5465 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
5468 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5476 case Intrinsic::x86_sse41_insertps:
5477 case Intrinsic::x86_sse41_dppd:
5478 case Intrinsic::x86_sse41_dpps:
5479 case Intrinsic::x86_sse41_mpsadbw:
5480 case Intrinsic::x86_avx_dp_ps_256:
5481 case Intrinsic::x86_avx2_mpsadbw: {
5487 Args.back() = Builder.CreateTrunc(Args.back(),
Type::getInt8Ty(
C),
"trunc");
5488 NewCall = Builder.CreateCall(NewFn, Args);
5492 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5493 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5494 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5495 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5496 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5497 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5503 NewCall = Builder.CreateCall(NewFn, Args);
5512 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5513 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5514 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5515 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5516 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5517 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5521 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5522 Args[1] = Builder.CreateBitCast(
5525 NewCall = Builder.CreateCall(NewFn, Args);
5526 Value *Res = Builder.CreateBitCast(
5534 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5535 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5536 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5540 Args[1] = Builder.CreateBitCast(
5542 Args[2] = Builder.CreateBitCast(
5545 NewCall = Builder.CreateCall(NewFn, Args);
5549 case Intrinsic::thread_pointer: {
5550 NewCall = Builder.CreateCall(NewFn, {});
5554 case Intrinsic::memcpy:
5555 case Intrinsic::memmove:
5556 case Intrinsic::memset: {
5572 NewCall = Builder.CreateCall(NewFn, Args);
5574 AttributeList NewAttrs = AttributeList::get(
5575 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5576 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5577 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5582 MemCI->setDestAlignment(
Align->getMaybeAlignValue());
5585 MTI->setSourceAlignment(
Align->getMaybeAlignValue());
5589 case Intrinsic::masked_load:
5590 case Intrinsic::masked_gather:
5591 case Intrinsic::masked_store:
5592 case Intrinsic::masked_scatter: {
5598 auto GetMaybeAlign = [](
Value *
Op) {
5608 auto GetAlign = [&](
Value *
Op) {
5617 case Intrinsic::masked_load:
5618 NewCall = Builder.CreateMaskedLoad(
5622 case Intrinsic::masked_gather:
5623 NewCall = Builder.CreateMaskedGather(
5629 case Intrinsic::masked_store:
5630 NewCall = Builder.CreateMaskedStore(
5634 case Intrinsic::masked_scatter:
5635 NewCall = Builder.CreateMaskedScatter(
5637 DL.getValueOrABITypeAlignment(
5651 case Intrinsic::lifetime_start:
5652 case Intrinsic::lifetime_end: {
5664 NewCall = Builder.CreateLifetimeStart(Ptr);
5666 NewCall = Builder.CreateLifetimeEnd(Ptr);
5675 case Intrinsic::x86_avx512_vpdpbusd_128:
5676 case Intrinsic::x86_avx512_vpdpbusd_256:
5677 case Intrinsic::x86_avx512_vpdpbusd_512:
5678 case Intrinsic::x86_avx512_vpdpbusds_128:
5679 case Intrinsic::x86_avx512_vpdpbusds_256:
5680 case Intrinsic::x86_avx512_vpdpbusds_512:
5681 case Intrinsic::x86_avx2_vpdpbssd_128:
5682 case Intrinsic::x86_avx2_vpdpbssd_256:
5683 case Intrinsic::x86_avx10_vpdpbssd_512:
5684 case Intrinsic::x86_avx2_vpdpbssds_128:
5685 case Intrinsic::x86_avx2_vpdpbssds_256:
5686 case Intrinsic::x86_avx10_vpdpbssds_512:
5687 case Intrinsic::x86_avx2_vpdpbsud_128:
5688 case Intrinsic::x86_avx2_vpdpbsud_256:
5689 case Intrinsic::x86_avx10_vpdpbsud_512:
5690 case Intrinsic::x86_avx2_vpdpbsuds_128:
5691 case Intrinsic::x86_avx2_vpdpbsuds_256:
5692 case Intrinsic::x86_avx10_vpdpbsuds_512:
5693 case Intrinsic::x86_avx2_vpdpbuud_128:
5694 case Intrinsic::x86_avx2_vpdpbuud_256:
5695 case Intrinsic::x86_avx10_vpdpbuud_512:
5696 case Intrinsic::x86_avx2_vpdpbuuds_128:
5697 case Intrinsic::x86_avx2_vpdpbuuds_256:
5698 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5703 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5704 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5706 NewCall = Builder.CreateCall(NewFn, Args);
5709 case Intrinsic::x86_avx512_vpdpwssd_128:
5710 case Intrinsic::x86_avx512_vpdpwssd_256:
5711 case Intrinsic::x86_avx512_vpdpwssd_512:
5712 case Intrinsic::x86_avx512_vpdpwssds_128:
5713 case Intrinsic::x86_avx512_vpdpwssds_256:
5714 case Intrinsic::x86_avx512_vpdpwssds_512:
5715 case Intrinsic::x86_avx2_vpdpwsud_128:
5716 case Intrinsic::x86_avx2_vpdpwsud_256:
5717 case Intrinsic::x86_avx10_vpdpwsud_512:
5718 case Intrinsic::x86_avx2_vpdpwsuds_128:
5719 case Intrinsic::x86_avx2_vpdpwsuds_256:
5720 case Intrinsic::x86_avx10_vpdpwsuds_512:
5721 case Intrinsic::x86_avx2_vpdpwusd_128:
5722 case Intrinsic::x86_avx2_vpdpwusd_256:
5723 case Intrinsic::x86_avx10_vpdpwusd_512:
5724 case Intrinsic::x86_avx2_vpdpwusds_128:
5725 case Intrinsic::x86_avx2_vpdpwusds_256:
5726 case Intrinsic::x86_avx10_vpdpwusds_512:
5727 case Intrinsic::x86_avx2_vpdpwuud_128:
5728 case Intrinsic::x86_avx2_vpdpwuud_256:
5729 case Intrinsic::x86_avx10_vpdpwuud_512:
5730 case Intrinsic::x86_avx2_vpdpwuuds_128:
5731 case Intrinsic::x86_avx2_vpdpwuuds_256:
5732 case Intrinsic::x86_avx10_vpdpwuuds_512:
5737 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5738 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5740 NewCall = Builder.CreateCall(NewFn, Args);
5743 assert(NewCall &&
"Should have either set this variable or returned through "
5744 "the default case");
5751 assert(
F &&
"Illegal attempt to upgrade a non-existent intrinsic.");
5765 F->eraseFromParent();
5771 if (NumOperands == 0)
5779 if (NumOperands == 3) {
5783 Metadata *Elts2[] = {ScalarType, ScalarType,
5797 if (
Opc != Instruction::BitCast)
5801 Type *SrcTy = V->getType();
5818 if (
Opc != Instruction::BitCast)
5821 Type *SrcTy =
C->getType();
5848 if (
NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5849 auto OpIt =
find_if(ModFlags->operands(), [](
const MDNode *Flag) {
5850 if (Flag->getNumOperands() < 3)
5852 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5853 return K->getString() ==
"Debug Info Version";
5856 if (OpIt != ModFlags->op_end()) {
5857 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5864 bool BrokenDebugInfo =
false;
5867 if (!BrokenDebugInfo)
5873 M.getContext().diagnose(Diag);
5880 M.getContext().diagnose(DiagVersion);
5890 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5893 if (
F->hasFnAttribute(Attr)) {
5896 StringRef S =
F->getFnAttribute(Attr).getValueAsString();
5898 auto [Part, Rest] = S.
split(
',');
5904 const unsigned Dim = DimC -
'x';
5905 assert(Dim < 3 &&
"Unexpected dim char");
5915 F->addFnAttr(Attr, NewAttr);
5919 return S ==
"x" || S ==
"y" || S ==
"z";
5924 if (K ==
"kernel") {
5936 const unsigned Idx = (AlignIdxValuePair >> 16);
5937 const Align StackAlign =
Align(AlignIdxValuePair & 0xFFFF);
5942 if (K ==
"maxclusterrank" || K ==
"cluster_max_blocks") {
5947 if (K ==
"minctasm") {
5952 if (K ==
"maxnreg") {
5957 if (K.consume_front(
"maxntid") &&
isXYZ(K)) {
5961 if (K.consume_front(
"reqntid") &&
isXYZ(K)) {
5965 if (K.consume_front(
"cluster_dim_") &&
isXYZ(K)) {
5969 if (K ==
"grid_constant") {
5984 NamedMDNode *NamedMD = M.getNamedMetadata(
"nvvm.annotations");
5991 if (!SeenNodes.
insert(MD).second)
5998 assert((MD->getNumOperands() % 2) == 1 &&
"Invalid number of operands");
6005 for (
unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
6007 const MDOperand &V = MD->getOperand(j + 1);
6010 NewOperands.
append({K, V});
6013 if (NewOperands.
size() > 1)
6026 const char *MarkerKey =
"clang.arc.retainAutoreleasedReturnValueMarker";
6027 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
6028 if (ModRetainReleaseMarker) {
6034 ID->getString().split(ValueComp,
"#");
6035 if (ValueComp.
size() == 2) {
6036 std::string NewValue = ValueComp[0].str() +
";" + ValueComp[1].str();
6040 M.eraseNamedMetadata(ModRetainReleaseMarker);
6051 auto UpgradeToIntrinsic = [&](
const char *OldFunc,
6077 bool InvalidCast =
false;
6079 for (
unsigned I = 0, E = CI->
arg_size();
I != E; ++
I) {
6092 Arg = Builder.CreateBitCast(Arg, NewFuncTy->
getParamType(
I));
6094 Args.push_back(Arg);
6101 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
6106 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->
getType());
6119 UpgradeToIntrinsic(
"clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
6127 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
6128 {
"objc_autorelease", llvm::Intrinsic::objc_autorelease},
6129 {
"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
6130 {
"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
6131 {
"objc_autoreleaseReturnValue",
6132 llvm::Intrinsic::objc_autoreleaseReturnValue},
6133 {
"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
6134 {
"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
6135 {
"objc_initWeak", llvm::Intrinsic::objc_initWeak},
6136 {
"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
6137 {
"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
6138 {
"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
6139 {
"objc_release", llvm::Intrinsic::objc_release},
6140 {
"objc_retain", llvm::Intrinsic::objc_retain},
6141 {
"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
6142 {
"objc_retainAutoreleaseReturnValue",
6143 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
6144 {
"objc_retainAutoreleasedReturnValue",
6145 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
6146 {
"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
6147 {
"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
6148 {
"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
6149 {
"objc_unsafeClaimAutoreleasedReturnValue",
6150 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
6151 {
"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
6152 {
"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
6153 {
"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
6154 {
"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
6155 {
"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
6156 {
"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
6157 {
"objc_arc_annotation_topdown_bbstart",
6158 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
6159 {
"objc_arc_annotation_topdown_bbend",
6160 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
6161 {
"objc_arc_annotation_bottomup_bbstart",
6162 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
6163 {
"objc_arc_annotation_bottomup_bbend",
6164 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
6166 for (
auto &
I : RuntimeFuncs)
6167 UpgradeToIntrinsic(
I.first,
I.second);
6171 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6175 bool HasObjCFlag =
false, HasClassProperties =
false,
Changed =
false;
6176 bool HasSwiftVersionFlag =
false;
6177 uint8_t SwiftMajorVersion, SwiftMinorVersion;
6184 if (
Op->getNumOperands() != 3)
6198 if (
ID->getString() ==
"Objective-C Image Info Version")
6200 if (
ID->getString() ==
"Objective-C Class Properties")
6201 HasClassProperties =
true;
6203 if (
ID->getString() ==
"PIC Level") {
6204 if (
auto *Behavior =
6206 uint64_t V = Behavior->getLimitedValue();
6212 if (
ID->getString() ==
"PIE Level")
6213 if (
auto *Behavior =
6220 if (
ID->getString() ==
"branch-target-enforcement" ||
6221 ID->getString().starts_with(
"sign-return-address")) {
6222 if (
auto *Behavior =
6228 Op->getOperand(1),
Op->getOperand(2)};
6238 if (
ID->getString() ==
"Objective-C Image Info Section") {
6241 Value->getString().split(ValueComp,
" ");
6242 if (ValueComp.
size() != 1) {
6243 std::string NewValue;
6244 for (
auto &S : ValueComp)
6245 NewValue += S.str();
6256 if (
ID->getString() ==
"Objective-C Garbage Collection") {
6259 assert(Md->getValue() &&
"Expected non-empty metadata");
6260 auto Type = Md->getValue()->getType();
6263 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6264 if ((Val & 0xff) != Val) {
6265 HasSwiftVersionFlag =
true;
6266 SwiftABIVersion = (Val & 0xff00) >> 8;
6267 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6268 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6279 if (
ID->getString() ==
"amdgpu_code_object_version") {
6282 MDString::get(M.getContext(),
"amdhsa_code_object_version"),
6294 if (HasObjCFlag && !HasClassProperties) {
6300 if (HasSwiftVersionFlag) {
6304 ConstantInt::get(Int8Ty, SwiftMajorVersion));
6306 ConstantInt::get(Int8Ty, SwiftMinorVersion));
6314 auto TrimSpaces = [](
StringRef Section) -> std::string {
6316 Section.split(Components,
',');
6321 for (
auto Component : Components)
6322 OS <<
',' << Component.trim();
6327 for (
auto &GV : M.globals()) {
6328 if (!GV.hasSection())
6333 if (!Section.starts_with(
"__DATA, __objc_catlist"))
6338 GV.setSection(TrimSpaces(Section));
6354struct StrictFPUpgradeVisitor :
public InstVisitor<StrictFPUpgradeVisitor> {
6355 StrictFPUpgradeVisitor() =
default;
6358 if (!
Call.isStrictFP())
6364 Call.removeFnAttr(Attribute::StrictFP);
6365 Call.addFnAttr(Attribute::NoBuiltin);
6370struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6371 :
public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6372 AMDGPUUnsafeFPAtomicsUpgradeVisitor() =
default;
6374 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6389 if (!
F.isDeclaration() && !
F.hasFnAttribute(Attribute::StrictFP)) {
6390 StrictFPUpgradeVisitor SFPV;
6395 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6396 F.getReturnType(),
F.getAttributes().getRetAttrs()));
6397 for (
auto &Arg :
F.args())
6399 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6401 bool AddingAttrs =
false, RemovingAttrs =
false;
6402 AttrBuilder AttrsToAdd(
F.getContext());
6407 if (
Attribute A =
F.getFnAttribute(
"implicit-section-name");
6408 A.isValid() &&
A.isStringAttribute()) {
6409 F.setSection(
A.getValueAsString());
6411 RemovingAttrs =
true;
6415 A.isValid() &&
A.isStringAttribute()) {
6418 AddingAttrs = RemovingAttrs =
true;
6421 if (
Attribute A =
F.getFnAttribute(
"uniform-work-group-size");
6422 A.isValid() &&
A.isStringAttribute() && !
A.getValueAsString().empty()) {
6424 RemovingAttrs =
true;
6425 if (
A.getValueAsString() ==
"true") {
6426 AttrsToAdd.addAttribute(
"uniform-work-group-size");
6435 if (
Attribute A =
F.getFnAttribute(
"amdgpu-unsafe-fp-atomics");
6438 if (
A.getValueAsBool()) {
6439 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6445 AttrsToRemove.
addAttribute(
"amdgpu-unsafe-fp-atomics");
6446 RemovingAttrs =
true;
6453 bool HandleDenormalMode =
false;
6455 if (
Attribute Attr =
F.getFnAttribute(
"denormal-fp-math"); Attr.isValid()) {
6458 DenormalFPMath = ParsedMode;
6460 AddingAttrs = RemovingAttrs =
true;
6461 HandleDenormalMode =
true;
6465 if (
Attribute Attr =
F.getFnAttribute(
"denormal-fp-math-f32");
6469 DenormalFPMathF32 = ParsedMode;
6471 AddingAttrs = RemovingAttrs =
true;
6472 HandleDenormalMode =
true;
6476 if (HandleDenormalMode)
6477 AttrsToAdd.addDenormalFPEnvAttr(
6481 F.removeFnAttrs(AttrsToRemove);
6484 F.addFnAttrs(AttrsToAdd);
6490 if (!
F.hasFnAttribute(FnAttrName))
6491 F.addFnAttr(FnAttrName,
Value);
6498 if (!
F.hasFnAttribute(FnAttrName)) {
6500 F.addFnAttr(FnAttrName);
6502 auto A =
F.getFnAttribute(FnAttrName);
6503 if (
"false" ==
A.getValueAsString())
6504 F.removeFnAttr(FnAttrName);
6505 else if (
"true" ==
A.getValueAsString()) {
6506 F.removeFnAttr(FnAttrName);
6507 F.addFnAttr(FnAttrName);
6513 Triple T(M.getTargetTriple());
6514 if (!
T.isThumb() && !
T.isARM() && !
T.isAArch64())
6524 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6528 if (
Op->getNumOperands() != 3)
6537 uint64_t *ValPtr = IDStr ==
"branch-target-enforcement" ? &BTEValue
6538 : IDStr ==
"branch-protection-pauth-lr" ? &BPPLRValue
6539 : IDStr ==
"guarded-control-stack" ? &GCSValue
6540 : IDStr ==
"sign-return-address" ? &SRAValue
6541 : IDStr ==
"sign-return-address-all" ? &SRAALLValue
6542 : IDStr ==
"sign-return-address-with-bkey"
6548 *ValPtr = CI->getZExtValue();
6554 bool BTE = BTEValue == 1;
6555 bool BPPLR = BPPLRValue == 1;
6556 bool GCS = GCSValue == 1;
6557 bool SRA = SRAValue == 1;
6560 if (SRA && SRAALLValue == 1)
6561 SignTypeValue =
"all";
6564 if (SRA && SRABKeyValue == 1)
6565 SignKeyValue =
"b_key";
6567 for (
Function &
F : M.getFunctionList()) {
6568 if (
F.isDeclaration())
6575 if (
auto A =
F.getFnAttribute(
"sign-return-address");
6576 A.isValid() &&
"none" ==
A.getValueAsString()) {
6577 F.removeFnAttr(
"sign-return-address");
6578 F.removeFnAttr(
"sign-return-address-key");
6594 if (SRAALLValue == 1)
6596 if (SRABKeyValue == 1)
6605 if (
T->getNumOperands() < 1)
6610 return S->getString().starts_with(
"llvm.vectorizer.");
6614 StringRef OldPrefix =
"llvm.vectorizer.";
6617 if (OldTag ==
"llvm.vectorizer.unroll")
6629 if (
T->getNumOperands() < 1)
6634 if (!OldTag->getString().starts_with(
"llvm.vectorizer."))
6639 Ops.reserve(
T->getNumOperands());
6641 for (
unsigned I = 1,
E =
T->getNumOperands();
I !=
E; ++
I)
6642 Ops.push_back(
T->getOperand(
I));
6656 Ops.reserve(
T->getNumOperands());
6667 if ((
T.isSPIR() || (
T.isSPIRV() && !
T.isSPIRVLogical())) &&
6668 !
DL.contains(
"-G") && !
DL.starts_with(
"G")) {
6669 return DL.empty() ? std::string(
"G1") : (
DL +
"-G1").str();
6672 if (
T.isLoongArch64() ||
T.isRISCV64()) {
6674 auto I =
DL.find(
"-n64-");
6676 return (
DL.take_front(
I) +
"-n32:64-" +
DL.drop_front(
I + 5)).str();
6681 std::string Res =
DL.str();
6684 if (!
DL.contains(
"-G") && !
DL.starts_with(
"G"))
6685 Res.append(Res.empty() ?
"G1" :
"-G1");
6693 if (!
DL.contains(
"-ni") && !
DL.starts_with(
"ni"))
6694 Res.append(
"-ni:7:8:9");
6696 if (
DL.ends_with(
"ni:7"))
6698 if (
DL.ends_with(
"ni:7:8"))
6703 if (!
DL.contains(
"-p7") && !
DL.starts_with(
"p7"))
6704 Res.append(
"-p7:160:256:256:32");
6705 if (!
DL.contains(
"-p8") && !
DL.starts_with(
"p8"))
6706 Res.append(
"-p8:128:128:128:48");
6707 constexpr StringRef OldP8(
"-p8:128:128-");
6708 if (
DL.contains(OldP8))
6709 Res.replace(Res.find(OldP8), OldP8.
size(),
"-p8:128:128:128:48-");
6710 if (!
DL.contains(
"-p9") && !
DL.starts_with(
"p9"))
6711 Res.append(
"-p9:192:256:256:32");
6715 if (!
DL.contains(
"m:e"))
6716 Res = Res.empty() ?
"m:e" :
"m:e-" + Res;
6721 if (
T.isSystemZ() && !
DL.empty()) {
6723 if (!
DL.contains(
"-S64"))
6724 return "E-S64" +
DL.drop_front(1).str();
6728 auto AddPtr32Ptr64AddrSpaces = [&
DL, &Res]() {
6731 StringRef AddrSpaces{
"-p270:32:32-p271:32:32-p272:64:64"};
6732 if (!
DL.contains(AddrSpaces)) {
6734 Regex R(
"^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6735 if (R.match(Res, &
Groups))
6741 if (
T.isAArch64()) {
6743 if (!
DL.empty() && !
DL.contains(
"-Fn32"))
6744 Res.append(
"-Fn32");
6745 AddPtr32Ptr64AddrSpaces();
6749 if (
T.isSPARC() || (
T.isMIPS64() && !
DL.contains(
"m:m")) ||
T.isPPC64() ||
6753 std::string I64 =
"-i64:64";
6754 std::string I128 =
"-i128:128";
6756 size_t Pos = Res.find(I64);
6757 if (Pos !=
size_t(-1))
6758 Res.insert(Pos + I64.size(), I128);
6762 if (
T.isPPC() &&
T.isOSAIX() && !
DL.contains(
"f64:32:64") && !
DL.empty()) {
6763 size_t Pos = Res.find(
"-S128");
6766 Res.insert(Pos,
"-f64:32:64");
6772 AddPtr32Ptr64AddrSpaces();
6780 if (!
T.isOSIAMCU()) {
6781 std::string I128 =
"-i128:128";
6784 Regex R(
"^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6785 if (R.match(Res, &
Groups))
6793 if (
T.isWindowsMSVCEnvironment() && !
T.isArch64Bit()) {
6795 auto I =
Ref.find(
"-f80:32-");
6797 Res = (
Ref.take_front(
I) +
"-f80:128-" +
Ref.drop_front(
I + 8)).str();
6805 Attribute A =
B.getAttribute(
"no-frame-pointer-elim");
6808 FramePointer =
A.getValueAsString() ==
"true" ?
"all" :
"none";
6809 B.removeAttribute(
"no-frame-pointer-elim");
6811 if (
B.contains(
"no-frame-pointer-elim-non-leaf")) {
6813 if (FramePointer !=
"all")
6814 FramePointer =
"non-leaf";
6815 B.removeAttribute(
"no-frame-pointer-elim-non-leaf");
6817 if (!FramePointer.
empty())
6818 B.addAttribute(
"frame-pointer", FramePointer);
6820 A =
B.getAttribute(
"null-pointer-is-valid");
6823 bool NullPointerIsValid =
A.getValueAsString() ==
"true";
6824 B.removeAttribute(
"null-pointer-is-valid");
6825 if (NullPointerIsValid)
6826 B.addAttribute(Attribute::NullPointerIsValid);
6829 A =
B.getAttribute(
"uniform-work-group-size");
6833 bool IsTrue = Val ==
"true";
6834 B.removeAttribute(
"uniform-work-group-size");
6836 B.addAttribute(
"uniform-work-group-size");
6847 return OBD.
getTag() ==
"clang.arc.attachedcall" &&
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static bool convertIntrinsicValidType(StringRef Name, const FunctionType *FuncTy)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeVectorSplice(CallBase *CI, IRBuilder<> &Builder)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static void reportFatalUsageErrorWithCI(StringRef reason, CallBase *CI)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static Value * upgradeConvertIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
NVPTX address space definition.
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Class for arbitrary precision integers.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
This class stores enough information to efficiently remove some attributes from an existing AttrBuild...
AttributeMask & addAttribute(Attribute::AttrKind Val)
Add an attribute to the mask.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
void setDebugLoc(DebugLoc Loc)
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Convenience struct for specifying and reasoning about fast-math flags.
void setApproxFunc(bool B=true)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
const Function & getFunction() const
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Type * getReturnType() const
Returns the type of the ret val.
Argument * getArg(unsigned i) const
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
LLVM_ABI SyncScope::ID getOrInsertSyncScopeID(StringRef SSN)
getOrInsertSyncScopeID - Maps synchronization scope name to synchronization scope ID.
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
const MDOperand & getOperand(unsigned I) const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
unsigned getNumOperands() const
Return number of MDNode operands.
LLVMContext & getContext() const
Tracking metadata reference owned by Metadata.
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
A Module instance is used to store all the information related to an LLVM module.
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
@ Min
Takes the min of the two values, which are required to be integers.
@ Max
Takes the max of the two values, which are required to be integers.
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
static constexpr size_t npos
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
Get the string size.
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< user_iterator > users()
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
const ParentTy * getParent() const
self_iterator getIterator()
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool isSignatureValid(Intrinsic::ID ID, FunctionType *FT, SmallVectorImpl< Type * > &OverloadTys, raw_ostream &OS=nulls())
Returns true if FT is a valid function type for intrinsic ID.
LLVM_ABI bool hasStructReturnType(ID id)
Returns true if id has a struct return type.
@ ADDRESS_SPACE_SHARED_CLUSTER
constexpr StringLiteral GridConstant("nvvm.grid_constant")
constexpr StringLiteral MaxNTID("nvvm.maxntid")
constexpr StringLiteral MaxNReg("nvvm.maxnreg")
constexpr StringLiteral MinCTASm("nvvm.minctasm")
constexpr StringLiteral ReqNTID("nvvm.reqntid")
constexpr StringLiteral MaxClusterRank("nvvm.maxclusterrank")
constexpr StringLiteral ClusterDim("nvvm.cluster_dim")
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
FunctionAddr VTableAddr uintptr_t uintptr_t Version
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
OperandBundleDefT< Value * > OperandBundleDef
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
DenormalMode parseDenormalFPAttribute(StringRef Str)
Returns the denormal mode to use for inputs and outputs.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ Default
The result value is uniform if and only if all operands are uniform.
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Represents the full denormal controls for a function, including the default mode and the f32 specific...
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getInvalid()
constexpr bool isValid() const
static constexpr DenormalMode getIEEE()
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.