34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsAMDGPU.h"
36#include "llvm/IR/IntrinsicsARM.h"
37#include "llvm/IR/IntrinsicsNVPTX.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
39#include "llvm/IR/IntrinsicsWebAssembly.h"
40#include "llvm/IR/IntrinsicsX86.h"
63 cl::desc(
"Disable autoupgrade of debug info"));
82 Type *Arg0Type =
F->getFunctionType()->getParamType(0);
97 Type *LastArgType =
F->getFunctionType()->getParamType(
98 F->getFunctionType()->getNumParams() - 1);
113 if (
F->getReturnType()->isVectorTy())
126 Type *Arg1Type =
F->getFunctionType()->getParamType(1);
127 Type *Arg2Type =
F->getFunctionType()->getParamType(2);
144 Type *Arg1Type =
F->getFunctionType()->getParamType(1);
145 Type *Arg2Type =
F->getFunctionType()->getParamType(2);
159 if (
F->getReturnType()->getScalarType()->isBFloatTy())
169 if (
F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
183 if (Name.consume_front(
"avx."))
184 return (Name.starts_with(
"blend.p") ||
185 Name ==
"cvt.ps2.pd.256" ||
186 Name ==
"cvtdq2.pd.256" ||
187 Name ==
"cvtdq2.ps.256" ||
188 Name.starts_with(
"movnt.") ||
189 Name.starts_with(
"sqrt.p") ||
190 Name.starts_with(
"storeu.") ||
191 Name.starts_with(
"vbroadcast.s") ||
192 Name.starts_with(
"vbroadcastf128") ||
193 Name.starts_with(
"vextractf128.") ||
194 Name.starts_with(
"vinsertf128.") ||
195 Name.starts_with(
"vperm2f128.") ||
196 Name.starts_with(
"vpermil."));
198 if (Name.consume_front(
"avx2."))
199 return (Name ==
"movntdqa" ||
200 Name.starts_with(
"pabs.") ||
201 Name.starts_with(
"padds.") ||
202 Name.starts_with(
"paddus.") ||
203 Name.starts_with(
"pblendd.") ||
205 Name.starts_with(
"pbroadcast") ||
206 Name.starts_with(
"pcmpeq.") ||
207 Name.starts_with(
"pcmpgt.") ||
208 Name.starts_with(
"pmax") ||
209 Name.starts_with(
"pmin") ||
210 Name.starts_with(
"pmovsx") ||
211 Name.starts_with(
"pmovzx") ||
213 Name ==
"pmulu.dq" ||
214 Name.starts_with(
"psll.dq") ||
215 Name.starts_with(
"psrl.dq") ||
216 Name.starts_with(
"psubs.") ||
217 Name.starts_with(
"psubus.") ||
218 Name.starts_with(
"vbroadcast") ||
219 Name ==
"vbroadcasti128" ||
220 Name ==
"vextracti128" ||
221 Name ==
"vinserti128" ||
222 Name ==
"vperm2i128");
224 if (Name.consume_front(
"avx512.")) {
225 if (Name.consume_front(
"mask."))
227 return (Name.starts_with(
"add.p") ||
228 Name.starts_with(
"and.") ||
229 Name.starts_with(
"andn.") ||
230 Name.starts_with(
"broadcast.s") ||
231 Name.starts_with(
"broadcastf32x4.") ||
232 Name.starts_with(
"broadcastf32x8.") ||
233 Name.starts_with(
"broadcastf64x2.") ||
234 Name.starts_with(
"broadcastf64x4.") ||
235 Name.starts_with(
"broadcasti32x4.") ||
236 Name.starts_with(
"broadcasti32x8.") ||
237 Name.starts_with(
"broadcasti64x2.") ||
238 Name.starts_with(
"broadcasti64x4.") ||
239 Name.starts_with(
"cmp.b") ||
240 Name.starts_with(
"cmp.d") ||
241 Name.starts_with(
"cmp.q") ||
242 Name.starts_with(
"cmp.w") ||
243 Name.starts_with(
"compress.b") ||
244 Name.starts_with(
"compress.d") ||
245 Name.starts_with(
"compress.p") ||
246 Name.starts_with(
"compress.q") ||
247 Name.starts_with(
"compress.store.") ||
248 Name.starts_with(
"compress.w") ||
249 Name.starts_with(
"conflict.") ||
250 Name.starts_with(
"cvtdq2pd.") ||
251 Name.starts_with(
"cvtdq2ps.") ||
252 Name ==
"cvtpd2dq.256" ||
253 Name ==
"cvtpd2ps.256" ||
254 Name ==
"cvtps2pd.128" ||
255 Name ==
"cvtps2pd.256" ||
256 Name.starts_with(
"cvtqq2pd.") ||
257 Name ==
"cvtqq2ps.256" ||
258 Name ==
"cvtqq2ps.512" ||
259 Name ==
"cvttpd2dq.256" ||
260 Name ==
"cvttps2dq.128" ||
261 Name ==
"cvttps2dq.256" ||
262 Name.starts_with(
"cvtudq2pd.") ||
263 Name.starts_with(
"cvtudq2ps.") ||
264 Name.starts_with(
"cvtuqq2pd.") ||
265 Name ==
"cvtuqq2ps.256" ||
266 Name ==
"cvtuqq2ps.512" ||
267 Name.starts_with(
"dbpsadbw.") ||
268 Name.starts_with(
"div.p") ||
269 Name.starts_with(
"expand.b") ||
270 Name.starts_with(
"expand.d") ||
271 Name.starts_with(
"expand.load.") ||
272 Name.starts_with(
"expand.p") ||
273 Name.starts_with(
"expand.q") ||
274 Name.starts_with(
"expand.w") ||
275 Name.starts_with(
"fpclass.p") ||
276 Name.starts_with(
"insert") ||
277 Name.starts_with(
"load.") ||
278 Name.starts_with(
"loadu.") ||
279 Name.starts_with(
"lzcnt.") ||
280 Name.starts_with(
"max.p") ||
281 Name.starts_with(
"min.p") ||
282 Name.starts_with(
"movddup") ||
283 Name.starts_with(
"move.s") ||
284 Name.starts_with(
"movshdup") ||
285 Name.starts_with(
"movsldup") ||
286 Name.starts_with(
"mul.p") ||
287 Name.starts_with(
"or.") ||
288 Name.starts_with(
"pabs.") ||
289 Name.starts_with(
"packssdw.") ||
290 Name.starts_with(
"packsswb.") ||
291 Name.starts_with(
"packusdw.") ||
292 Name.starts_with(
"packuswb.") ||
293 Name.starts_with(
"padd.") ||
294 Name.starts_with(
"padds.") ||
295 Name.starts_with(
"paddus.") ||
296 Name.starts_with(
"palignr.") ||
297 Name.starts_with(
"pand.") ||
298 Name.starts_with(
"pandn.") ||
299 Name.starts_with(
"pavg") ||
300 Name.starts_with(
"pbroadcast") ||
301 Name.starts_with(
"pcmpeq.") ||
302 Name.starts_with(
"pcmpgt.") ||
303 Name.starts_with(
"perm.df.") ||
304 Name.starts_with(
"perm.di.") ||
305 Name.starts_with(
"permvar.") ||
306 Name.starts_with(
"pmaddubs.w.") ||
307 Name.starts_with(
"pmaddw.d.") ||
308 Name.starts_with(
"pmax") ||
309 Name.starts_with(
"pmin") ||
310 Name ==
"pmov.qd.256" ||
311 Name ==
"pmov.qd.512" ||
312 Name ==
"pmov.wb.256" ||
313 Name ==
"pmov.wb.512" ||
314 Name.starts_with(
"pmovsx") ||
315 Name.starts_with(
"pmovzx") ||
316 Name.starts_with(
"pmul.dq.") ||
317 Name.starts_with(
"pmul.hr.sw.") ||
318 Name.starts_with(
"pmulh.w.") ||
319 Name.starts_with(
"pmulhu.w.") ||
320 Name.starts_with(
"pmull.") ||
321 Name.starts_with(
"pmultishift.qb.") ||
322 Name.starts_with(
"pmulu.dq.") ||
323 Name.starts_with(
"por.") ||
324 Name.starts_with(
"prol.") ||
325 Name.starts_with(
"prolv.") ||
326 Name.starts_with(
"pror.") ||
327 Name.starts_with(
"prorv.") ||
328 Name.starts_with(
"pshuf.b.") ||
329 Name.starts_with(
"pshuf.d.") ||
330 Name.starts_with(
"pshufh.w.") ||
331 Name.starts_with(
"pshufl.w.") ||
332 Name.starts_with(
"psll.d") ||
333 Name.starts_with(
"psll.q") ||
334 Name.starts_with(
"psll.w") ||
335 Name.starts_with(
"pslli") ||
336 Name.starts_with(
"psllv") ||
337 Name.starts_with(
"psra.d") ||
338 Name.starts_with(
"psra.q") ||
339 Name.starts_with(
"psra.w") ||
340 Name.starts_with(
"psrai") ||
341 Name.starts_with(
"psrav") ||
342 Name.starts_with(
"psrl.d") ||
343 Name.starts_with(
"psrl.q") ||
344 Name.starts_with(
"psrl.w") ||
345 Name.starts_with(
"psrli") ||
346 Name.starts_with(
"psrlv") ||
347 Name.starts_with(
"psub.") ||
348 Name.starts_with(
"psubs.") ||
349 Name.starts_with(
"psubus.") ||
350 Name.starts_with(
"pternlog.") ||
351 Name.starts_with(
"punpckh") ||
352 Name.starts_with(
"punpckl") ||
353 Name.starts_with(
"pxor.") ||
354 Name.starts_with(
"shuf.f") ||
355 Name.starts_with(
"shuf.i") ||
356 Name.starts_with(
"shuf.p") ||
357 Name.starts_with(
"sqrt.p") ||
358 Name.starts_with(
"store.b.") ||
359 Name.starts_with(
"store.d.") ||
360 Name.starts_with(
"store.p") ||
361 Name.starts_with(
"store.q.") ||
362 Name.starts_with(
"store.w.") ||
363 Name ==
"store.ss" ||
364 Name.starts_with(
"storeu.") ||
365 Name.starts_with(
"sub.p") ||
366 Name.starts_with(
"ucmp.") ||
367 Name.starts_with(
"unpckh.") ||
368 Name.starts_with(
"unpckl.") ||
369 Name.starts_with(
"valign.") ||
370 Name ==
"vcvtph2ps.128" ||
371 Name ==
"vcvtph2ps.256" ||
372 Name.starts_with(
"vextract") ||
373 Name.starts_with(
"vfmadd.") ||
374 Name.starts_with(
"vfmaddsub.") ||
375 Name.starts_with(
"vfnmadd.") ||
376 Name.starts_with(
"vfnmsub.") ||
377 Name.starts_with(
"vpdpbusd.") ||
378 Name.starts_with(
"vpdpbusds.") ||
379 Name.starts_with(
"vpdpwssd.") ||
380 Name.starts_with(
"vpdpwssds.") ||
381 Name.starts_with(
"vpermi2var.") ||
382 Name.starts_with(
"vpermil.p") ||
383 Name.starts_with(
"vpermilvar.") ||
384 Name.starts_with(
"vpermt2var.") ||
385 Name.starts_with(
"vpmadd52") ||
386 Name.starts_with(
"vpshld.") ||
387 Name.starts_with(
"vpshldv.") ||
388 Name.starts_with(
"vpshrd.") ||
389 Name.starts_with(
"vpshrdv.") ||
390 Name.starts_with(
"vpshufbitqmb.") ||
391 Name.starts_with(
"xor."));
393 if (Name.consume_front(
"mask3."))
395 return (Name.starts_with(
"vfmadd.") ||
396 Name.starts_with(
"vfmaddsub.") ||
397 Name.starts_with(
"vfmsub.") ||
398 Name.starts_with(
"vfmsubadd.") ||
399 Name.starts_with(
"vfnmsub."));
401 if (Name.consume_front(
"maskz."))
403 return (Name.starts_with(
"pternlog.") ||
404 Name.starts_with(
"vfmadd.") ||
405 Name.starts_with(
"vfmaddsub.") ||
406 Name.starts_with(
"vpdpbusd.") ||
407 Name.starts_with(
"vpdpbusds.") ||
408 Name.starts_with(
"vpdpwssd.") ||
409 Name.starts_with(
"vpdpwssds.") ||
410 Name.starts_with(
"vpermt2var.") ||
411 Name.starts_with(
"vpmadd52") ||
412 Name.starts_with(
"vpshldv.") ||
413 Name.starts_with(
"vpshrdv."));
416 return (Name ==
"movntdqa" ||
417 Name ==
"pmul.dq.512" ||
418 Name ==
"pmulu.dq.512" ||
419 Name.starts_with(
"broadcastm") ||
420 Name.starts_with(
"cmp.p") ||
421 Name.starts_with(
"cvtb2mask.") ||
422 Name.starts_with(
"cvtd2mask.") ||
423 Name.starts_with(
"cvtmask2") ||
424 Name.starts_with(
"cvtq2mask.") ||
425 Name ==
"cvtusi2sd" ||
426 Name.starts_with(
"cvtw2mask.") ||
431 Name ==
"kortestc.w" ||
432 Name ==
"kortestz.w" ||
433 Name.starts_with(
"kunpck") ||
436 Name.starts_with(
"padds.") ||
437 Name.starts_with(
"pbroadcast") ||
438 Name.starts_with(
"prol") ||
439 Name.starts_with(
"pror") ||
440 Name.starts_with(
"psll.dq") ||
441 Name.starts_with(
"psrl.dq") ||
442 Name.starts_with(
"psubs.") ||
443 Name.starts_with(
"ptestm") ||
444 Name.starts_with(
"ptestnm") ||
445 Name.starts_with(
"storent.") ||
446 Name.starts_with(
"vbroadcast.s") ||
447 Name.starts_with(
"vpshld.") ||
448 Name.starts_with(
"vpshrd."));
451 if (Name.consume_front(
"fma."))
452 return (Name.starts_with(
"vfmadd.") ||
453 Name.starts_with(
"vfmsub.") ||
454 Name.starts_with(
"vfmsubadd.") ||
455 Name.starts_with(
"vfnmadd.") ||
456 Name.starts_with(
"vfnmsub."));
458 if (Name.consume_front(
"fma4."))
459 return Name.starts_with(
"vfmadd.s");
461 if (Name.consume_front(
"sse."))
462 return (Name ==
"add.ss" ||
463 Name ==
"cvtsi2ss" ||
464 Name ==
"cvtsi642ss" ||
467 Name.starts_with(
"sqrt.p") ||
469 Name.starts_with(
"storeu.") ||
472 if (Name.consume_front(
"sse2."))
473 return (Name ==
"add.sd" ||
474 Name ==
"cvtdq2pd" ||
475 Name ==
"cvtdq2ps" ||
476 Name ==
"cvtps2pd" ||
477 Name ==
"cvtsi2sd" ||
478 Name ==
"cvtsi642sd" ||
479 Name ==
"cvtss2sd" ||
482 Name.starts_with(
"padds.") ||
483 Name.starts_with(
"paddus.") ||
484 Name.starts_with(
"pcmpeq.") ||
485 Name.starts_with(
"pcmpgt.") ||
490 Name ==
"pmulu.dq" ||
491 Name.starts_with(
"pshuf") ||
492 Name.starts_with(
"psll.dq") ||
493 Name.starts_with(
"psrl.dq") ||
494 Name.starts_with(
"psubs.") ||
495 Name.starts_with(
"psubus.") ||
496 Name.starts_with(
"sqrt.p") ||
498 Name ==
"storel.dq" ||
499 Name.starts_with(
"storeu.") ||
502 if (Name.consume_front(
"sse41."))
503 return (Name.starts_with(
"blendp") ||
504 Name ==
"movntdqa" ||
514 Name.starts_with(
"pmovsx") ||
515 Name.starts_with(
"pmovzx") ||
518 if (Name.consume_front(
"sse42."))
519 return Name ==
"crc32.64.8";
521 if (Name.consume_front(
"sse4a."))
522 return Name.starts_with(
"movnt.");
524 if (Name.consume_front(
"ssse3."))
525 return (Name ==
"pabs.b.128" ||
526 Name ==
"pabs.d.128" ||
527 Name ==
"pabs.w.128");
529 if (Name.consume_front(
"xop."))
530 return (Name ==
"vpcmov" ||
531 Name ==
"vpcmov.256" ||
532 Name.starts_with(
"vpcom") ||
533 Name.starts_with(
"vprot"));
535 return (Name ==
"addcarry.u32" ||
536 Name ==
"addcarry.u64" ||
537 Name ==
"addcarryx.u32" ||
538 Name ==
"addcarryx.u64" ||
539 Name ==
"subborrow.u32" ||
540 Name ==
"subborrow.u64" ||
541 Name.starts_with(
"vcvtph2ps."));
547 if (!Name.consume_front(
"x86."))
555 if (Name ==
"rdtscp") {
557 if (
F->getFunctionType()->getNumParams() == 0)
562 Intrinsic::x86_rdtscp);
569 if (Name.consume_front(
"sse41.ptest")) {
571 .
Case(
"c", Intrinsic::x86_sse41_ptestc)
572 .
Case(
"z", Intrinsic::x86_sse41_ptestz)
573 .
Case(
"nzc", Intrinsic::x86_sse41_ptestnzc)
586 .
Case(
"sse41.insertps", Intrinsic::x86_sse41_insertps)
587 .
Case(
"sse41.dppd", Intrinsic::x86_sse41_dppd)
588 .
Case(
"sse41.dpps", Intrinsic::x86_sse41_dpps)
589 .
Case(
"sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
590 .
Case(
"avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
591 .
Case(
"avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
596 if (Name.consume_front(
"avx512.")) {
597 if (Name.consume_front(
"mask.cmp.")) {
600 .
Case(
"pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
601 .
Case(
"pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
602 .
Case(
"pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
603 .
Case(
"ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
604 .
Case(
"ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
605 .
Case(
"ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
609 }
else if (Name.starts_with(
"vpdpbusd.") ||
610 Name.starts_with(
"vpdpbusds.")) {
613 .
Case(
"vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
614 .
Case(
"vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
615 .
Case(
"vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
616 .
Case(
"vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
617 .
Case(
"vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
618 .
Case(
"vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
622 }
else if (Name.starts_with(
"vpdpwssd.") ||
623 Name.starts_with(
"vpdpwssds.")) {
626 .
Case(
"vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)
627 .
Case(
"vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)
628 .
Case(
"vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)
629 .
Case(
"vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)
630 .
Case(
"vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)
631 .
Case(
"vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)
639 if (Name.consume_front(
"avx2.")) {
640 if (Name.consume_front(
"vpdpb")) {
643 .
Case(
"ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
644 .
Case(
"ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
645 .
Case(
"ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
646 .
Case(
"ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
647 .
Case(
"sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
648 .
Case(
"sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
649 .
Case(
"suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
650 .
Case(
"suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
651 .
Case(
"uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
652 .
Case(
"uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
653 .
Case(
"uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
654 .
Case(
"uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
658 }
else if (Name.consume_front(
"vpdpw")) {
661 .
Case(
"sud.128", Intrinsic::x86_avx2_vpdpwsud_128)
662 .
Case(
"sud.256", Intrinsic::x86_avx2_vpdpwsud_256)
663 .
Case(
"suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)
664 .
Case(
"suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)
665 .
Case(
"usd.128", Intrinsic::x86_avx2_vpdpwusd_128)
666 .
Case(
"usd.256", Intrinsic::x86_avx2_vpdpwusd_256)
667 .
Case(
"usds.128", Intrinsic::x86_avx2_vpdpwusds_128)
668 .
Case(
"usds.256", Intrinsic::x86_avx2_vpdpwusds_256)
669 .
Case(
"uud.128", Intrinsic::x86_avx2_vpdpwuud_128)
670 .
Case(
"uud.256", Intrinsic::x86_avx2_vpdpwuud_256)
671 .
Case(
"uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)
672 .
Case(
"uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)
680 if (Name.consume_front(
"avx10.")) {
681 if (Name.consume_front(
"vpdpb")) {
684 .
Case(
"ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
685 .
Case(
"ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
686 .
Case(
"sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
687 .
Case(
"suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
688 .
Case(
"uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
689 .
Case(
"uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
693 }
else if (Name.consume_front(
"vpdpw")) {
695 .
Case(
"sud.512", Intrinsic::x86_avx10_vpdpwsud_512)
696 .
Case(
"suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)
697 .
Case(
"usd.512", Intrinsic::x86_avx10_vpdpwusd_512)
698 .
Case(
"usds.512", Intrinsic::x86_avx10_vpdpwusds_512)
699 .
Case(
"uud.512", Intrinsic::x86_avx10_vpdpwuud_512)
700 .
Case(
"uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)
708 if (Name.consume_front(
"avx512bf16.")) {
711 .
Case(
"cvtne2ps2bf16.128",
712 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
713 .
Case(
"cvtne2ps2bf16.256",
714 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
715 .
Case(
"cvtne2ps2bf16.512",
716 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
717 .
Case(
"mask.cvtneps2bf16.128",
718 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
719 .
Case(
"cvtneps2bf16.256",
720 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
721 .
Case(
"cvtneps2bf16.512",
722 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
729 .
Case(
"dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
730 .
Case(
"dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
731 .
Case(
"dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
738 if (Name.consume_front(
"xop.")) {
740 if (Name.starts_with(
"vpermil2")) {
743 auto Idx =
F->getFunctionType()->getParamType(2);
744 if (Idx->isFPOrFPVectorTy()) {
745 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
746 unsigned EltSize = Idx->getScalarSizeInBits();
747 if (EltSize == 64 && IdxSize == 128)
748 ID = Intrinsic::x86_xop_vpermil2pd;
749 else if (EltSize == 32 && IdxSize == 128)
750 ID = Intrinsic::x86_xop_vpermil2ps;
751 else if (EltSize == 64 && IdxSize == 256)
752 ID = Intrinsic::x86_xop_vpermil2pd_256;
754 ID = Intrinsic::x86_xop_vpermil2ps_256;
756 }
else if (
F->arg_size() == 2)
759 .
Case(
"vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
760 .
Case(
"vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
771 if (Name ==
"seh.recoverfp") {
773 Intrinsic::eh_recoverfp);
785 if (Name.starts_with(
"rbit")) {
788 F->getParent(), Intrinsic::bitreverse,
F->arg_begin()->getType());
792 if (Name ==
"thread.pointer") {
795 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
799 bool Neon = Name.consume_front(
"neon.");
804 if (Name.consume_front(
"bfdot.")) {
808 .
Cases({
"v2f32.v8i8",
"v4f32.v16i8"},
813 size_t OperandWidth =
F->getReturnType()->getPrimitiveSizeInBits();
814 assert((OperandWidth == 64 || OperandWidth == 128) &&
815 "Unexpected operand width");
817 std::array<Type *, 2> Tys{
828 if (Name.consume_front(
"bfm")) {
830 if (Name.consume_back(
".v4f32.v16i8")) {
876 F->arg_begin()->getType());
880 if (Name.consume_front(
"vst")) {
882 static const Regex vstRegex(
"^([1234]|[234]lane)\\.v[a-z0-9]*$");
886 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
887 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
890 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
891 Intrinsic::arm_neon_vst4lane};
893 auto fArgs =
F->getFunctionType()->params();
894 Type *Tys[] = {fArgs[0], fArgs[1]};
897 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
900 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
909 if (Name.consume_front(
"mve.")) {
911 if (Name ==
"vctp64") {
921 if (Name.starts_with(
"vrintn.v")) {
923 F->getParent(), Intrinsic::roundeven,
F->arg_begin()->getType());
928 if (Name.consume_back(
".v4i1")) {
930 if (Name.consume_back(
".predicated.v2i64.v4i32"))
932 return Name ==
"mull.int" || Name ==
"vqdmull";
934 if (Name.consume_back(
".v2i64")) {
936 bool IsGather = Name.consume_front(
"vldr.gather.");
937 if (IsGather || Name.consume_front(
"vstr.scatter.")) {
938 if (Name.consume_front(
"base.")) {
940 Name.consume_front(
"wb.");
943 return Name ==
"predicated.v2i64";
946 if (Name.consume_front(
"offset.predicated."))
947 return Name == (IsGather ?
"v2i64.p0i64" :
"p0i64.v2i64") ||
948 Name == (IsGather ?
"v2i64.p0" :
"p0.v2i64");
961 if (Name.consume_front(
"cde.vcx")) {
963 if (Name.consume_back(
".predicated.v2i64.v4i1"))
965 return Name ==
"1q" || Name ==
"1qa" || Name ==
"2q" || Name ==
"2qa" ||
966 Name ==
"3q" || Name ==
"3qa";
980 F->arg_begin()->getType());
984 if (Name.starts_with(
"addp")) {
986 if (
F->arg_size() != 2)
989 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
991 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
997 if (Name.starts_with(
"bfcvt")) {
1004 if (Name.consume_front(
"sve.")) {
1006 if (Name.consume_front(
"bf")) {
1007 if (Name ==
"mmla") {
1008 Type *Tys[] = {
F->getReturnType(),
1009 std::next(
F->arg_begin())->getType()};
1011 F->getParent(), Intrinsic::aarch64_sve_fmmla, Tys);
1014 if (Name.consume_back(
".lane")) {
1018 .
Case(
"dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
1019 .
Case(
"mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1020 .
Case(
"mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1032 if (Name ==
"fcvt.bf16f32" || Name ==
"fcvtnt.bf16f32") {
1037 if (Name.consume_front(
"addqv")) {
1039 if (!
F->getReturnType()->isFPOrFPVectorTy())
1042 auto Args =
F->getFunctionType()->params();
1043 Type *Tys[] = {
F->getReturnType(), Args[1]};
1045 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
1049 if (Name.consume_front(
"ld")) {
1051 static const Regex LdRegex(
"^[234](.nxv[a-z0-9]+|$)");
1052 if (LdRegex.
match(Name)) {
1058 "Expected 2 arguments for ld* intrinsic.");
1059 Type *PtrTy =
F->getArg(1)->getType();
1062 Intrinsic::aarch64_sve_ld2_sret,
1063 Intrinsic::aarch64_sve_ld3_sret,
1064 Intrinsic::aarch64_sve_ld4_sret,
1067 F->getParent(), LoadIDs[Name[0] -
'2'], {Ty, PtrTy});
1073 if (Name.consume_front(
"tuple.")) {
1075 if (Name.starts_with(
"get")) {
1077 Type *Tys[] = {
F->getReturnType(),
F->arg_begin()->getType()};
1079 F->getParent(), Intrinsic::vector_extract, Tys);
1083 if (Name.starts_with(
"set")) {
1085 auto Args =
F->getFunctionType()->params();
1086 Type *Tys[] = {Args[0], Args[2], Args[1]};
1088 F->getParent(), Intrinsic::vector_insert, Tys);
1092 static const Regex CreateTupleRegex(
"^create[234](.nxv[a-z0-9]+|$)");
1093 if (CreateTupleRegex.
match(Name)) {
1095 auto Args =
F->getFunctionType()->params();
1096 Type *Tys[] = {
F->getReturnType(), Args[1]};
1098 F->getParent(), Intrinsic::vector_insert, Tys);
1104 if (Name.starts_with(
"rev.nxv")) {
1107 F->getParent(), Intrinsic::vector_reverse,
F->getReturnType());
1119 if (Name.consume_front(
"cp.async.bulk.tensor.g2s.")) {
1123 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1125 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1127 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1128 .
Case(
"tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1129 .
Case(
"tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1130 .
Case(
"tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1131 .
Case(
"tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1132 .
Case(
"tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1141 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1155 size_t FlagStartIndex =
F->getFunctionType()->getNumParams() - 3;
1156 Type *ArgType =
F->getFunctionType()->getParamType(FlagStartIndex);
1166 if (Name.consume_front(
"mapa.shared.cluster"))
1167 if (
F->getReturnType()->getPointerAddressSpace() ==
1169 return Intrinsic::nvvm_mapa_shared_cluster;
1171 if (Name.consume_front(
"cp.async.bulk.")) {
1174 .
Case(
"global.to.shared.cluster",
1175 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1176 .
Case(
"shared.cta.to.cluster",
1177 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1181 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1190 if (Name.consume_front(
"fma.rn."))
1192 .
Case(
"bf16", Intrinsic::nvvm_fma_rn_bf16)
1193 .
Case(
"bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1194 .
Case(
"relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1195 .
Case(
"relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1198 if (Name.consume_front(
"fmax."))
1200 .
Case(
"bf16", Intrinsic::nvvm_fmax_bf16)
1201 .
Case(
"bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1202 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1203 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1204 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1205 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1206 .
Case(
"ftz.nan.xorsign.abs.bf16",
1207 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1208 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1209 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1210 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1211 .
Case(
"ftz.xorsign.abs.bf16x2",
1212 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1213 .
Case(
"nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1214 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1215 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1216 .
Case(
"nan.xorsign.abs.bf16x2",
1217 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1218 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1219 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1222 if (Name.consume_front(
"fmin."))
1224 .
Case(
"bf16", Intrinsic::nvvm_fmin_bf16)
1225 .
Case(
"bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1226 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1227 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1228 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1229 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1230 .
Case(
"ftz.nan.xorsign.abs.bf16",
1231 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1232 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1233 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1234 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1235 .
Case(
"ftz.xorsign.abs.bf16x2",
1236 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1237 .
Case(
"nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1238 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1239 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1240 .
Case(
"nan.xorsign.abs.bf16x2",
1241 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1242 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1243 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1246 if (Name.consume_front(
"neg."))
1248 .
Case(
"bf16", Intrinsic::nvvm_neg_bf16)
1249 .
Case(
"bf16x2", Intrinsic::nvvm_neg_bf16x2)
1256 return Name.consume_front(
"local") || Name.consume_front(
"shared") ||
1257 Name.consume_front(
"global") || Name.consume_front(
"constant") ||
1258 Name.consume_front(
"param");
1264 if (Name.starts_with(
"to.fp16")) {
1268 FuncTy->getReturnType());
1271 if (Name.starts_with(
"from.fp16")) {
1275 FuncTy->getReturnType());
1282 bool CanUpgradeDebugIntrinsicsToRecords) {
1283 assert(
F &&
"Illegal to upgrade a non-existent Function.");
1288 if (!Name.consume_front(
"llvm.") || Name.empty())
1294 bool IsArm = Name.consume_front(
"arm.");
1295 if (IsArm || Name.consume_front(
"aarch64.")) {
1301 if (Name.consume_front(
"amdgcn.")) {
1302 if (Name ==
"alignbit") {
1305 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1309 if (Name.consume_front(
"atomic.")) {
1310 if (Name.starts_with(
"inc") || Name.starts_with(
"dec") ||
1311 Name.starts_with(
"cond.sub") || Name.starts_with(
"csub")) {
1320 switch (
F->getIntrinsicID()) {
1324 case Intrinsic::amdgcn_wmma_i32_16x16x64_iu8:
1325 if (
F->arg_size() == 7) {
1330 case Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8:
1331 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
1332 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
1333 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
1334 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
1335 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
1336 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16:
1337 if (
F->arg_size() == 8) {
1344 if (Name.consume_front(
"ds.") || Name.consume_front(
"global.atomic.") ||
1345 Name.consume_front(
"flat.atomic.")) {
1346 if (Name.starts_with(
"fadd") ||
1348 (Name.starts_with(
"fmin") && !Name.starts_with(
"fmin.num")) ||
1349 (Name.starts_with(
"fmax") && !Name.starts_with(
"fmax.num"))) {
1357 if (Name.starts_with(
"ldexp.")) {
1360 F->getParent(), Intrinsic::ldexp,
1361 {F->getReturnType(), F->getArg(1)->getType()});
1370 if (
F->arg_size() == 1) {
1371 if (Name.consume_front(
"convert.")) {
1385 F->arg_begin()->getType());
1390 if (
F->arg_size() == 2 && Name ==
"coro.end") {
1393 Intrinsic::coro_end);
1400 if (Name.consume_front(
"dbg.")) {
1402 if (CanUpgradeDebugIntrinsicsToRecords) {
1403 if (Name ==
"addr" || Name ==
"value" || Name ==
"assign" ||
1404 Name ==
"declare" || Name ==
"label") {
1413 if (Name ==
"addr" || (Name ==
"value" &&
F->arg_size() == 4)) {
1416 Intrinsic::dbg_value);
1423 if (Name.consume_front(
"experimental.vector.")) {
1429 .
StartsWith(
"extract.", Intrinsic::vector_extract)
1430 .
StartsWith(
"insert.", Intrinsic::vector_insert)
1431 .
StartsWith(
"reverse.", Intrinsic::vector_reverse)
1432 .
StartsWith(
"interleave2.", Intrinsic::vector_interleave2)
1433 .
StartsWith(
"deinterleave2.", Intrinsic::vector_deinterleave2)
1435 Intrinsic::vector_partial_reduce_add)
1438 const auto *FT =
F->getFunctionType();
1440 if (
ID == Intrinsic::vector_extract ||
1441 ID == Intrinsic::vector_interleave2)
1444 if (
ID != Intrinsic::vector_interleave2)
1446 if (
ID == Intrinsic::vector_insert ||
1447 ID == Intrinsic::vector_partial_reduce_add)
1455 if (Name.consume_front(
"reduce.")) {
1457 static const Regex R(
"^([a-z]+)\\.[a-z][0-9]+");
1458 if (R.match(Name, &
Groups))
1460 .
Case(
"add", Intrinsic::vector_reduce_add)
1461 .
Case(
"mul", Intrinsic::vector_reduce_mul)
1462 .
Case(
"and", Intrinsic::vector_reduce_and)
1463 .
Case(
"or", Intrinsic::vector_reduce_or)
1464 .
Case(
"xor", Intrinsic::vector_reduce_xor)
1465 .
Case(
"smax", Intrinsic::vector_reduce_smax)
1466 .
Case(
"smin", Intrinsic::vector_reduce_smin)
1467 .
Case(
"umax", Intrinsic::vector_reduce_umax)
1468 .
Case(
"umin", Intrinsic::vector_reduce_umin)
1469 .
Case(
"fmax", Intrinsic::vector_reduce_fmax)
1470 .
Case(
"fmin", Intrinsic::vector_reduce_fmin)
1475 static const Regex R2(
"^v2\\.([a-z]+)\\.[fi][0-9]+");
1480 .
Case(
"fadd", Intrinsic::vector_reduce_fadd)
1481 .
Case(
"fmul", Intrinsic::vector_reduce_fmul)
1486 auto Args =
F->getFunctionType()->params();
1488 {Args[V2 ? 1 : 0]});
1494 if (Name.consume_front(
"splice"))
1498 if (Name.consume_front(
"experimental.stepvector.")) {
1502 F->getParent(),
ID,
F->getFunctionType()->getReturnType());
1507 if (Name.starts_with(
"flt.rounds")) {
1510 Intrinsic::get_rounding);
1515 if (Name.starts_with(
"invariant.group.barrier")) {
1517 auto Args =
F->getFunctionType()->params();
1518 Type* ObjectPtr[1] = {Args[0]};
1521 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1526 if ((Name.starts_with(
"lifetime.start") ||
1527 Name.starts_with(
"lifetime.end")) &&
1528 F->arg_size() == 2) {
1530 ? Intrinsic::lifetime_start
1531 : Intrinsic::lifetime_end;
1534 F->getArg(0)->getType());
1543 .StartsWith(
"memcpy.", Intrinsic::memcpy)
1544 .StartsWith(
"memmove.", Intrinsic::memmove)
1546 if (
F->arg_size() == 5) {
1550 F->getFunctionType()->params().slice(0, 3);
1556 if (Name.starts_with(
"memset.") &&
F->arg_size() == 5) {
1559 const auto *FT =
F->getFunctionType();
1560 Type *ParamTypes[2] = {
1561 FT->getParamType(0),
1565 Intrinsic::memset, ParamTypes);
1571 .
StartsWith(
"masked.load", Intrinsic::masked_load)
1572 .
StartsWith(
"masked.gather", Intrinsic::masked_gather)
1573 .
StartsWith(
"masked.store", Intrinsic::masked_store)
1574 .
StartsWith(
"masked.scatter", Intrinsic::masked_scatter)
1576 if (MaskedID &&
F->arg_size() == 4) {
1578 if (MaskedID == Intrinsic::masked_load ||
1579 MaskedID == Intrinsic::masked_gather) {
1581 F->getParent(), MaskedID,
1582 {F->getReturnType(), F->getArg(0)->getType()});
1586 F->getParent(), MaskedID,
1587 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1593 if (Name.consume_front(
"nvvm.")) {
1595 if (
F->arg_size() == 1) {
1598 .
Cases({
"brev32",
"brev64"}, Intrinsic::bitreverse)
1599 .Case(
"clz.i", Intrinsic::ctlz)
1600 .
Case(
"popc.i", Intrinsic::ctpop)
1604 {F->getReturnType()});
1607 }
else if (
F->arg_size() == 2) {
1610 .
Cases({
"max.s",
"max.i",
"max.ll"}, Intrinsic::smax)
1611 .Cases({
"min.s",
"min.i",
"min.ll"}, Intrinsic::smin)
1612 .Cases({
"max.us",
"max.ui",
"max.ull"}, Intrinsic::umax)
1613 .Cases({
"min.us",
"min.ui",
"min.ull"}, Intrinsic::umin)
1617 {F->getReturnType()});
1623 if (!
F->getReturnType()->getScalarType()->isBFloatTy()) {
1651 bool Expand =
false;
1652 if (Name.consume_front(
"abs."))
1655 Name ==
"i" || Name ==
"ll" || Name ==
"bf16" || Name ==
"bf16x2";
1656 else if (Name.consume_front(
"fabs."))
1658 Expand = Name ==
"f" || Name ==
"ftz.f" || Name ==
"d";
1659 else if (Name.consume_front(
"ex2.approx."))
1662 Name ==
"f" || Name ==
"ftz.f" || Name ==
"d" || Name ==
"f16x2";
1663 else if (Name.consume_front(
"atomic.load."))
1672 else if (Name.consume_front(
"bitcast."))
1675 Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" || Name ==
"d2ll";
1676 else if (Name.consume_front(
"rotate."))
1678 Expand = Name ==
"b32" || Name ==
"b64" || Name ==
"right.b64";
1679 else if (Name.consume_front(
"ptr.gen.to."))
1682 else if (Name.consume_front(
"ptr."))
1685 else if (Name.consume_front(
"ldg.global."))
1687 Expand = (Name.starts_with(
"i.") || Name.starts_with(
"f.") ||
1688 Name.starts_with(
"p."));
1691 .
Case(
"barrier0",
true)
1692 .
Case(
"barrier.n",
true)
1693 .
Case(
"barrier.sync.cnt",
true)
1694 .
Case(
"barrier.sync",
true)
1695 .
Case(
"barrier",
true)
1696 .
Case(
"bar.sync",
true)
1697 .
Case(
"barrier0.popc",
true)
1698 .
Case(
"barrier0.and",
true)
1699 .
Case(
"barrier0.or",
true)
1700 .
Case(
"clz.ll",
true)
1701 .
Case(
"popc.ll",
true)
1703 .
Case(
"swap.lo.hi.b64",
true)
1704 .
Case(
"tanh.approx.f32",
true)
1716 if (Name.starts_with(
"objectsize.")) {
1717 Type *Tys[2] = {
F->getReturnType(),
F->arg_begin()->getType() };
1718 if (
F->arg_size() == 2 ||
F->arg_size() == 3) {
1721 Intrinsic::objectsize, Tys);
1728 if (Name.starts_with(
"ptr.annotation.") &&
F->arg_size() == 4) {
1731 F->getParent(), Intrinsic::ptr_annotation,
1732 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1738 if (Name.consume_front(
"riscv.")) {
1741 .
Case(
"aes32dsi", Intrinsic::riscv_aes32dsi)
1742 .
Case(
"aes32dsmi", Intrinsic::riscv_aes32dsmi)
1743 .
Case(
"aes32esi", Intrinsic::riscv_aes32esi)
1744 .
Case(
"aes32esmi", Intrinsic::riscv_aes32esmi)
1747 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1760 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1761 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1770 .
StartsWith(
"sha256sig0", Intrinsic::riscv_sha256sig0)
1771 .
StartsWith(
"sha256sig1", Intrinsic::riscv_sha256sig1)
1772 .
StartsWith(
"sha256sum0", Intrinsic::riscv_sha256sum0)
1773 .
StartsWith(
"sha256sum1", Intrinsic::riscv_sha256sum1)
1778 if (
F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1787 if (Name ==
"clmul.i32" || Name ==
"clmul.i64") {
1789 F->getParent(), Intrinsic::clmul, {F->getReturnType()});
1798 if (Name ==
"stackprotectorcheck") {
1805 if (Name ==
"thread.pointer") {
1807 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
1813 if (Name ==
"var.annotation" &&
F->arg_size() == 4) {
1816 F->getParent(), Intrinsic::var_annotation,
1817 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1820 if (Name.consume_front(
"vector.splice")) {
1821 if (Name.starts_with(
".left") || Name.starts_with(
".right"))
1829 if (Name.consume_front(
"wasm.")) {
1832 .
StartsWith(
"fma.", Intrinsic::wasm_relaxed_madd)
1833 .
StartsWith(
"fms.", Intrinsic::wasm_relaxed_nmadd)
1834 .
StartsWith(
"laneselect.", Intrinsic::wasm_relaxed_laneselect)
1839 F->getReturnType());
1843 if (Name.consume_front(
"dot.i8x16.i7x16.")) {
1845 .
Case(
"signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1847 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1866 if (ST && (!
ST->isLiteral() ||
ST->isPacked()) &&
1876 std::string
Name =
F->getName().str();
1879 Name,
F->getParent());
1890 if (Result != std::nullopt) {
1903 bool CanUpgradeDebugIntrinsicsToRecords) {
1923 GV->
getName() ==
"llvm.global_dtors")) ||
1938 unsigned N =
Init->getNumOperands();
1939 std::vector<Constant *> NewCtors(
N);
1940 for (
unsigned i = 0; i !=
N; ++i) {
1943 Ctor->getAggregateElement(1),
1957 unsigned NumElts = ResultTy->getNumElements() * 8;
1961 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
1971 for (
unsigned l = 0; l != NumElts; l += 16)
1972 for (
unsigned i = 0; i != 16; ++i) {
1973 unsigned Idx = NumElts + i - Shift;
1975 Idx -= NumElts - 16;
1976 Idxs[l + i] = Idx + l;
1979 Res = Builder.CreateShuffleVector(Res,
Op,
ArrayRef(Idxs, NumElts));
1983 return Builder.CreateBitCast(Res, ResultTy,
"cast");
1991 unsigned NumElts = ResultTy->getNumElements() * 8;
1995 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
2005 for (
unsigned l = 0; l != NumElts; l += 16)
2006 for (
unsigned i = 0; i != 16; ++i) {
2007 unsigned Idx = i + Shift;
2009 Idx += NumElts - 16;
2010 Idxs[l + i] = Idx + l;
2013 Res = Builder.CreateShuffleVector(
Op, Res,
ArrayRef(Idxs, NumElts));
2017 return Builder.CreateBitCast(Res, ResultTy,
"cast");
2025 Mask = Builder.CreateBitCast(Mask, MaskTy);
2031 for (
unsigned i = 0; i != NumElts; ++i)
2033 Mask = Builder.CreateShuffleVector(Mask, Mask,
ArrayRef(Indices, NumElts),
2044 if (
C->isAllOnesValue())
2049 return Builder.CreateSelect(Mask, Op0, Op1);
2056 if (
C->isAllOnesValue())
2060 Mask->getType()->getIntegerBitWidth());
2061 Mask = Builder.CreateBitCast(Mask, MaskTy);
2062 Mask = Builder.CreateExtractElement(Mask, (
uint64_t)0);
2063 return Builder.CreateSelect(Mask, Op0, Op1);
2076 assert((IsVALIGN || NumElts % 16 == 0) &&
"Illegal NumElts for PALIGNR!");
2077 assert((!IsVALIGN || NumElts <= 16) &&
"NumElts too large for VALIGN!");
2082 ShiftVal &= (NumElts - 1);
2091 if (ShiftVal > 16) {
2099 for (
unsigned l = 0; l < NumElts; l += 16) {
2100 for (
unsigned i = 0; i != 16; ++i) {
2101 unsigned Idx = ShiftVal + i;
2102 if (!IsVALIGN && Idx >= 16)
2103 Idx += NumElts - 16;
2104 Indices[l + i] = Idx + l;
2109 Op1, Op0,
ArrayRef(Indices, NumElts),
"palignr");
2115 bool ZeroMask,
bool IndexForm) {
2118 unsigned EltWidth = Ty->getScalarSizeInBits();
2119 bool IsFloat = Ty->isFPOrFPVectorTy();
2121 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2122 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2123 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2124 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2125 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2126 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2127 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2128 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2129 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2130 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2131 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2132 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2133 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2134 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2135 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2136 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2137 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2138 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2139 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2140 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2141 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2142 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2143 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2144 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2145 else if (VecWidth == 128 && EltWidth == 16)
2146 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2147 else if (VecWidth == 256 && EltWidth == 16)
2148 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2149 else if (VecWidth == 512 && EltWidth == 16)
2150 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2151 else if (VecWidth == 128 && EltWidth == 8)
2152 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2153 else if (VecWidth == 256 && EltWidth == 8)
2154 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2155 else if (VecWidth == 512 && EltWidth == 8)
2156 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2167 Value *V = Builder.CreateIntrinsic(IID, Args);
2179 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2190 bool IsRotateRight) {
2200 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
2201 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2204 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2205 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2250 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2255 bool IsShiftRight,
bool ZeroMask) {
2269 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
2270 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2273 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2274 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2289 const Align Alignment =
2291 ?
Align(
Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2296 if (
C->isAllOnesValue())
2297 return Builder.CreateAlignedStore(
Data, Ptr, Alignment);
2302 return Builder.CreateMaskedStore(
Data, Ptr, Alignment, Mask);
2308 const Align Alignment =
2317 if (
C->isAllOnesValue())
2318 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2323 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2329 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2330 {Op0, Builder.getInt1(
false)});
2345 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2346 LHS = Builder.CreateShl(
LHS, ShiftAmt);
2347 LHS = Builder.CreateAShr(
LHS, ShiftAmt);
2348 RHS = Builder.CreateShl(
RHS, ShiftAmt);
2349 RHS = Builder.CreateAShr(
RHS, ShiftAmt);
2352 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2353 LHS = Builder.CreateAnd(
LHS, Mask);
2354 RHS = Builder.CreateAnd(
RHS, Mask);
2371 if (!
C || !
C->isAllOnesValue())
2372 Vec = Builder.CreateAnd(Vec,
getX86MaskVec(Builder, Mask, NumElts));
2377 for (
unsigned i = 0; i != NumElts; ++i)
2379 for (
unsigned i = NumElts; i != 8; ++i)
2380 Indices[i] = NumElts + i % NumElts;
2381 Vec = Builder.CreateShuffleVector(Vec,
2385 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2389 unsigned CC,
bool Signed) {
2397 }
else if (CC == 7) {
2433 Value* AndNode = Builder.CreateAnd(Mask,
APInt(8, 1));
2434 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2436 Value* Extract2 = Builder.CreateExtractElement(Src, (
uint64_t)0);
2437 Value*
Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2446 return Builder.CreateSExt(Mask, ReturnOp,
"vpmovm2");
2452 Name = Name.substr(12);
2457 if (Name.starts_with(
"max.p")) {
2458 if (VecWidth == 128 && EltWidth == 32)
2459 IID = Intrinsic::x86_sse_max_ps;
2460 else if (VecWidth == 128 && EltWidth == 64)
2461 IID = Intrinsic::x86_sse2_max_pd;
2462 else if (VecWidth == 256 && EltWidth == 32)
2463 IID = Intrinsic::x86_avx_max_ps_256;
2464 else if (VecWidth == 256 && EltWidth == 64)
2465 IID = Intrinsic::x86_avx_max_pd_256;
2468 }
else if (Name.starts_with(
"min.p")) {
2469 if (VecWidth == 128 && EltWidth == 32)
2470 IID = Intrinsic::x86_sse_min_ps;
2471 else if (VecWidth == 128 && EltWidth == 64)
2472 IID = Intrinsic::x86_sse2_min_pd;
2473 else if (VecWidth == 256 && EltWidth == 32)
2474 IID = Intrinsic::x86_avx_min_ps_256;
2475 else if (VecWidth == 256 && EltWidth == 64)
2476 IID = Intrinsic::x86_avx_min_pd_256;
2479 }
else if (Name.starts_with(
"pshuf.b.")) {
2480 if (VecWidth == 128)
2481 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2482 else if (VecWidth == 256)
2483 IID = Intrinsic::x86_avx2_pshuf_b;
2484 else if (VecWidth == 512)
2485 IID = Intrinsic::x86_avx512_pshuf_b_512;
2488 }
else if (Name.starts_with(
"pmul.hr.sw.")) {
2489 if (VecWidth == 128)
2490 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2491 else if (VecWidth == 256)
2492 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2493 else if (VecWidth == 512)
2494 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2497 }
else if (Name.starts_with(
"pmulh.w.")) {
2498 if (VecWidth == 128)
2499 IID = Intrinsic::x86_sse2_pmulh_w;
2500 else if (VecWidth == 256)
2501 IID = Intrinsic::x86_avx2_pmulh_w;
2502 else if (VecWidth == 512)
2503 IID = Intrinsic::x86_avx512_pmulh_w_512;
2506 }
else if (Name.starts_with(
"pmulhu.w.")) {
2507 if (VecWidth == 128)
2508 IID = Intrinsic::x86_sse2_pmulhu_w;
2509 else if (VecWidth == 256)
2510 IID = Intrinsic::x86_avx2_pmulhu_w;
2511 else if (VecWidth == 512)
2512 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2515 }
else if (Name.starts_with(
"pmaddw.d.")) {
2516 if (VecWidth == 128)
2517 IID = Intrinsic::x86_sse2_pmadd_wd;
2518 else if (VecWidth == 256)
2519 IID = Intrinsic::x86_avx2_pmadd_wd;
2520 else if (VecWidth == 512)
2521 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2524 }
else if (Name.starts_with(
"pmaddubs.w.")) {
2525 if (VecWidth == 128)
2526 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2527 else if (VecWidth == 256)
2528 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2529 else if (VecWidth == 512)
2530 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2533 }
else if (Name.starts_with(
"packsswb.")) {
2534 if (VecWidth == 128)
2535 IID = Intrinsic::x86_sse2_packsswb_128;
2536 else if (VecWidth == 256)
2537 IID = Intrinsic::x86_avx2_packsswb;
2538 else if (VecWidth == 512)
2539 IID = Intrinsic::x86_avx512_packsswb_512;
2542 }
else if (Name.starts_with(
"packssdw.")) {
2543 if (VecWidth == 128)
2544 IID = Intrinsic::x86_sse2_packssdw_128;
2545 else if (VecWidth == 256)
2546 IID = Intrinsic::x86_avx2_packssdw;
2547 else if (VecWidth == 512)
2548 IID = Intrinsic::x86_avx512_packssdw_512;
2551 }
else if (Name.starts_with(
"packuswb.")) {
2552 if (VecWidth == 128)
2553 IID = Intrinsic::x86_sse2_packuswb_128;
2554 else if (VecWidth == 256)
2555 IID = Intrinsic::x86_avx2_packuswb;
2556 else if (VecWidth == 512)
2557 IID = Intrinsic::x86_avx512_packuswb_512;
2560 }
else if (Name.starts_with(
"packusdw.")) {
2561 if (VecWidth == 128)
2562 IID = Intrinsic::x86_sse41_packusdw;
2563 else if (VecWidth == 256)
2564 IID = Intrinsic::x86_avx2_packusdw;
2565 else if (VecWidth == 512)
2566 IID = Intrinsic::x86_avx512_packusdw_512;
2569 }
else if (Name.starts_with(
"vpermilvar.")) {
2570 if (VecWidth == 128 && EltWidth == 32)
2571 IID = Intrinsic::x86_avx_vpermilvar_ps;
2572 else if (VecWidth == 128 && EltWidth == 64)
2573 IID = Intrinsic::x86_avx_vpermilvar_pd;
2574 else if (VecWidth == 256 && EltWidth == 32)
2575 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2576 else if (VecWidth == 256 && EltWidth == 64)
2577 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2578 else if (VecWidth == 512 && EltWidth == 32)
2579 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2580 else if (VecWidth == 512 && EltWidth == 64)
2581 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2584 }
else if (Name ==
"cvtpd2dq.256") {
2585 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2586 }
else if (Name ==
"cvtpd2ps.256") {
2587 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2588 }
else if (Name ==
"cvttpd2dq.256") {
2589 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2590 }
else if (Name ==
"cvttps2dq.128") {
2591 IID = Intrinsic::x86_sse2_cvttps2dq;
2592 }
else if (Name ==
"cvttps2dq.256") {
2593 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2594 }
else if (Name.starts_with(
"permvar.")) {
2596 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2597 IID = Intrinsic::x86_avx2_permps;
2598 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2599 IID = Intrinsic::x86_avx2_permd;
2600 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2601 IID = Intrinsic::x86_avx512_permvar_df_256;
2602 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2603 IID = Intrinsic::x86_avx512_permvar_di_256;
2604 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2605 IID = Intrinsic::x86_avx512_permvar_sf_512;
2606 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2607 IID = Intrinsic::x86_avx512_permvar_si_512;
2608 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2609 IID = Intrinsic::x86_avx512_permvar_df_512;
2610 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2611 IID = Intrinsic::x86_avx512_permvar_di_512;
2612 else if (VecWidth == 128 && EltWidth == 16)
2613 IID = Intrinsic::x86_avx512_permvar_hi_128;
2614 else if (VecWidth == 256 && EltWidth == 16)
2615 IID = Intrinsic::x86_avx512_permvar_hi_256;
2616 else if (VecWidth == 512 && EltWidth == 16)
2617 IID = Intrinsic::x86_avx512_permvar_hi_512;
2618 else if (VecWidth == 128 && EltWidth == 8)
2619 IID = Intrinsic::x86_avx512_permvar_qi_128;
2620 else if (VecWidth == 256 && EltWidth == 8)
2621 IID = Intrinsic::x86_avx512_permvar_qi_256;
2622 else if (VecWidth == 512 && EltWidth == 8)
2623 IID = Intrinsic::x86_avx512_permvar_qi_512;
2626 }
else if (Name.starts_with(
"dbpsadbw.")) {
2627 if (VecWidth == 128)
2628 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2629 else if (VecWidth == 256)
2630 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2631 else if (VecWidth == 512)
2632 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2635 }
else if (Name.starts_with(
"pmultishift.qb.")) {
2636 if (VecWidth == 128)
2637 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2638 else if (VecWidth == 256)
2639 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2640 else if (VecWidth == 512)
2641 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2644 }
else if (Name.starts_with(
"conflict.")) {
2645 if (Name[9] ==
'd' && VecWidth == 128)
2646 IID = Intrinsic::x86_avx512_conflict_d_128;
2647 else if (Name[9] ==
'd' && VecWidth == 256)
2648 IID = Intrinsic::x86_avx512_conflict_d_256;
2649 else if (Name[9] ==
'd' && VecWidth == 512)
2650 IID = Intrinsic::x86_avx512_conflict_d_512;
2651 else if (Name[9] ==
'q' && VecWidth == 128)
2652 IID = Intrinsic::x86_avx512_conflict_q_128;
2653 else if (Name[9] ==
'q' && VecWidth == 256)
2654 IID = Intrinsic::x86_avx512_conflict_q_256;
2655 else if (Name[9] ==
'q' && VecWidth == 512)
2656 IID = Intrinsic::x86_avx512_conflict_q_512;
2659 }
else if (Name.starts_with(
"pavg.")) {
2660 if (Name[5] ==
'b' && VecWidth == 128)
2661 IID = Intrinsic::x86_sse2_pavg_b;
2662 else if (Name[5] ==
'b' && VecWidth == 256)
2663 IID = Intrinsic::x86_avx2_pavg_b;
2664 else if (Name[5] ==
'b' && VecWidth == 512)
2665 IID = Intrinsic::x86_avx512_pavg_b_512;
2666 else if (Name[5] ==
'w' && VecWidth == 128)
2667 IID = Intrinsic::x86_sse2_pavg_w;
2668 else if (Name[5] ==
'w' && VecWidth == 256)
2669 IID = Intrinsic::x86_avx2_pavg_w;
2670 else if (Name[5] ==
'w' && VecWidth == 512)
2671 IID = Intrinsic::x86_avx512_pavg_w_512;
2680 Rep = Builder.CreateIntrinsic(IID, Args);
2691 if (AsmStr->find(
"mov\tfp") == 0 &&
2692 AsmStr->find(
"objc_retainAutoreleaseReturnValue") != std::string::npos &&
2693 (Pos = AsmStr->find(
"# marker")) != std::string::npos) {
2694 AsmStr->replace(Pos, 1,
";");
2700 Value *Rep =
nullptr;
2702 if (Name ==
"abs.i" || Name ==
"abs.ll") {
2704 Rep = Builder.CreateIntrinsic(Intrinsic::abs, {Arg->
getType()},
2705 {Arg, Builder.getTrue()},
2707 }
else if (Name ==
"abs.bf16" || Name ==
"abs.bf16x2") {
2708 Type *Ty = (Name ==
"abs.bf16")
2712 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2713 Rep = Builder.CreateBitCast(Abs, CI->
getType());
2714 }
else if (Name ==
"fabs.f" || Name ==
"fabs.ftz.f" || Name ==
"fabs.d") {
2715 Intrinsic::ID IID = (Name ==
"fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2716 : Intrinsic::nvvm_fabs;
2717 Rep = Builder.CreateUnaryIntrinsic(IID, CI->
getArgOperand(0));
2718 }
else if (Name.consume_front(
"ex2.approx.")) {
2720 Intrinsic::ID IID = Name.starts_with(
"ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2721 : Intrinsic::nvvm_ex2_approx;
2722 Rep = Builder.CreateUnaryIntrinsic(IID, CI->
getArgOperand(0));
2723 }
else if (Name.starts_with(
"atomic.load.add.f32.p") ||
2724 Name.starts_with(
"atomic.load.add.f64.p")) {
2727 Rep = Builder.CreateAtomicRMW(
2733 }
else if (Name.starts_with(
"atomic.load.inc.32.p") ||
2734 Name.starts_with(
"atomic.load.dec.32.p")) {
2739 Rep = Builder.CreateAtomicRMW(
2743 }
else if (Name ==
"clz.ll") {
2746 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->
getType()},
2747 {Arg, Builder.getFalse()},
2749 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(),
"ctlz.trunc");
2750 }
else if (Name ==
"popc.ll") {
2754 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->
getType()},
2755 Arg,
nullptr,
"ctpop");
2756 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(),
"ctpop.trunc");
2757 }
else if (Name ==
"h2f") {
2759 Builder.CreateBitCast(CI->
getArgOperand(0), Builder.getHalfTy());
2760 Rep = Builder.CreateFPExt(Cast, Builder.getFloatTy());
2761 }
else if (Name.consume_front(
"bitcast.") &&
2762 (Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" ||
2765 }
else if (Name ==
"rotate.b32") {
2768 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2769 {Arg, Arg, ShiftAmt});
2770 }
else if (Name ==
"rotate.b64") {
2774 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2775 {Arg, Arg, ZExtShiftAmt});
2776 }
else if (Name ==
"rotate.right.b64") {
2780 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2781 {Arg, Arg, ZExtShiftAmt});
2782 }
else if (Name ==
"swap.lo.hi.b64") {
2785 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2786 {Arg, Arg, Builder.getInt64(32)});
2787 }
else if ((Name.consume_front(
"ptr.gen.to.") &&
2790 Name.starts_with(
".to.gen"))) {
2792 }
else if (Name.consume_front(
"ldg.global")) {
2796 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2799 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2801 }
else if (Name ==
"tanh.approx.f32") {
2805 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->
getArgOperand(0),
2807 }
else if (Name ==
"barrier0" || Name ==
"barrier.n" || Name ==
"bar.sync") {
2809 Name.ends_with(
'0') ? Builder.getInt32(0) : CI->
getArgOperand(0);
2810 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2812 }
else if (Name ==
"barrier") {
2813 Rep = Builder.CreateIntrinsic(
2814 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2816 }
else if (Name ==
"barrier.sync") {
2817 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2819 }
else if (Name ==
"barrier.sync.cnt") {
2820 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2822 }
else if (Name ==
"barrier0.popc" || Name ==
"barrier0.and" ||
2823 Name ==
"barrier0.or") {
2825 C = Builder.CreateICmpNE(
C, Builder.getInt32(0));
2829 .
Case(
"barrier0.popc",
2830 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2831 .
Case(
"barrier0.and",
2832 Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2833 .
Case(
"barrier0.or",
2834 Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2835 Value *Bar = Builder.CreateIntrinsic(IID, {}, {Builder.getInt32(0),
C});
2836 Rep = Builder.CreateZExt(Bar, CI->
getType());
2840 !
F->getReturnType()->getScalarType()->isBFloatTy()) {
2850 ? Builder.CreateBitCast(Arg, NewType)
2853 Rep = Builder.CreateCall(NewFn, Args);
2854 if (
F->getReturnType()->isIntegerTy())
2855 Rep = Builder.CreateBitCast(Rep,
F->getReturnType());
2865 Value *Rep =
nullptr;
2867 if (Name.starts_with(
"sse4a.movnt.")) {
2879 Builder.CreateExtractElement(Arg1, (
uint64_t)0,
"extractelement");
2882 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2883 }
else if (Name.starts_with(
"avx.movnt.") ||
2884 Name.starts_with(
"avx512.storent.")) {
2896 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2897 }
else if (Name ==
"sse2.storel.dq") {
2902 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
2903 Value *Elt = Builder.CreateExtractElement(BC0, (
uint64_t)0);
2904 Builder.CreateAlignedStore(Elt, Arg0,
Align(1));
2905 }
else if (Name.starts_with(
"sse.storeu.") ||
2906 Name.starts_with(
"sse2.storeu.") ||
2907 Name.starts_with(
"avx.storeu.")) {
2910 Builder.CreateAlignedStore(Arg1, Arg0,
Align(1));
2911 }
else if (Name ==
"avx512.mask.store.ss") {
2915 }
else if (Name.starts_with(
"avx512.mask.store")) {
2917 bool Aligned = Name[17] !=
'u';
2920 }
else if (Name.starts_with(
"sse2.pcmp") || Name.starts_with(
"avx2.pcmp")) {
2923 bool CmpEq = Name[9] ==
'e';
2926 Rep = Builder.CreateSExt(Rep, CI->
getType(),
"");
2927 }
else if (Name.starts_with(
"avx512.broadcastm")) {
2934 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2935 }
else if (Name ==
"sse.sqrt.ss" || Name ==
"sse2.sqrt.sd") {
2937 Value *Elt0 = Builder.CreateExtractElement(Vec, (
uint64_t)0);
2938 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->
getType(), Elt0);
2939 Rep = Builder.CreateInsertElement(Vec, Elt0, (
uint64_t)0);
2940 }
else if (Name.starts_with(
"avx.sqrt.p") ||
2941 Name.starts_with(
"sse2.sqrt.p") ||
2942 Name.starts_with(
"sse.sqrt.p")) {
2943 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
2944 {CI->getArgOperand(0)});
2945 }
else if (Name.starts_with(
"avx512.mask.sqrt.p")) {
2949 Intrinsic::ID IID = Name[18] ==
's' ? Intrinsic::x86_avx512_sqrt_ps_512
2950 : Intrinsic::x86_avx512_sqrt_pd_512;
2953 Rep = Builder.CreateIntrinsic(IID, Args);
2955 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
2956 {CI->getArgOperand(0)});
2960 }
else if (Name.starts_with(
"avx512.ptestm") ||
2961 Name.starts_with(
"avx512.ptestnm")) {
2965 Rep = Builder.CreateAnd(Op0, Op1);
2971 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2973 }
else if (Name.starts_with(
"avx512.mask.pbroadcast")) {
2976 Rep = Builder.CreateVectorSplat(NumElts, CI->
getArgOperand(0));
2979 }
else if (Name.starts_with(
"avx512.kunpck")) {
2984 for (
unsigned i = 0; i != NumElts; ++i)
2993 Rep = Builder.CreateShuffleVector(
RHS,
LHS,
ArrayRef(Indices, NumElts));
2994 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2995 }
else if (Name ==
"avx512.kand.w") {
2998 Rep = Builder.CreateAnd(
LHS,
RHS);
2999 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3000 }
else if (Name ==
"avx512.kandn.w") {
3003 LHS = Builder.CreateNot(
LHS);
3004 Rep = Builder.CreateAnd(
LHS,
RHS);
3005 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3006 }
else if (Name ==
"avx512.kor.w") {
3009 Rep = Builder.CreateOr(
LHS,
RHS);
3010 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3011 }
else if (Name ==
"avx512.kxor.w") {
3014 Rep = Builder.CreateXor(
LHS,
RHS);
3015 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3016 }
else if (Name ==
"avx512.kxnor.w") {
3019 LHS = Builder.CreateNot(
LHS);
3020 Rep = Builder.CreateXor(
LHS,
RHS);
3021 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3022 }
else if (Name ==
"avx512.knot.w") {
3024 Rep = Builder.CreateNot(Rep);
3025 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3026 }
else if (Name ==
"avx512.kortestz.w" || Name ==
"avx512.kortestc.w") {
3029 Rep = Builder.CreateOr(
LHS,
RHS);
3030 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
3032 if (Name[14] ==
'c')
3036 Rep = Builder.CreateICmpEQ(Rep,
C);
3037 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
3038 }
else if (Name ==
"sse.add.ss" || Name ==
"sse2.add.sd" ||
3039 Name ==
"sse.sub.ss" || Name ==
"sse2.sub.sd" ||
3040 Name ==
"sse.mul.ss" || Name ==
"sse2.mul.sd" ||
3041 Name ==
"sse.div.ss" || Name ==
"sse2.div.sd") {
3044 ConstantInt::get(I32Ty, 0));
3046 ConstantInt::get(I32Ty, 0));
3048 if (Name.contains(
".add."))
3049 EltOp = Builder.CreateFAdd(Elt0, Elt1);
3050 else if (Name.contains(
".sub."))
3051 EltOp = Builder.CreateFSub(Elt0, Elt1);
3052 else if (Name.contains(
".mul."))
3053 EltOp = Builder.CreateFMul(Elt0, Elt1);
3055 EltOp = Builder.CreateFDiv(Elt0, Elt1);
3056 Rep = Builder.CreateInsertElement(CI->
getArgOperand(0), EltOp,
3057 ConstantInt::get(I32Ty, 0));
3058 }
else if (Name.starts_with(
"avx512.mask.pcmp")) {
3060 bool CmpEq = Name[16] ==
'e';
3062 }
else if (Name.starts_with(
"avx512.mask.vpshufbitqmb.")) {
3071 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3074 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3077 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3084 }
else if (Name.starts_with(
"avx512.mask.fpclass.p")) {
3089 if (VecWidth == 128 && EltWidth == 32)
3090 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3091 else if (VecWidth == 256 && EltWidth == 32)
3092 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3093 else if (VecWidth == 512 && EltWidth == 32)
3094 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3095 else if (VecWidth == 128 && EltWidth == 64)
3096 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3097 else if (VecWidth == 256 && EltWidth == 64)
3098 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3099 else if (VecWidth == 512 && EltWidth == 64)
3100 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3107 }
else if (Name.starts_with(
"avx512.cmp.p")) {
3109 Type *OpTy = Args[0]->getType();
3113 if (VecWidth == 128 && EltWidth == 32)
3114 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3115 else if (VecWidth == 256 && EltWidth == 32)
3116 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3117 else if (VecWidth == 512 && EltWidth == 32)
3118 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3119 else if (VecWidth == 128 && EltWidth == 64)
3120 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3121 else if (VecWidth == 256 && EltWidth == 64)
3122 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3123 else if (VecWidth == 512 && EltWidth == 64)
3124 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3129 if (VecWidth == 512)
3131 Args.push_back(Mask);
3133 Rep = Builder.CreateIntrinsic(IID, Args);
3134 }
else if (Name.starts_with(
"avx512.mask.cmp.")) {
3138 }
else if (Name.starts_with(
"avx512.mask.ucmp.")) {
3141 }
else if (Name.starts_with(
"avx512.cvtb2mask.") ||
3142 Name.starts_with(
"avx512.cvtw2mask.") ||
3143 Name.starts_with(
"avx512.cvtd2mask.") ||
3144 Name.starts_with(
"avx512.cvtq2mask.")) {
3149 }
else if (Name ==
"ssse3.pabs.b.128" || Name ==
"ssse3.pabs.w.128" ||
3150 Name ==
"ssse3.pabs.d.128" || Name.starts_with(
"avx2.pabs") ||
3151 Name.starts_with(
"avx512.mask.pabs")) {
3153 }
else if (Name ==
"sse41.pmaxsb" || Name ==
"sse2.pmaxs.w" ||
3154 Name ==
"sse41.pmaxsd" || Name.starts_with(
"avx2.pmaxs") ||
3155 Name.starts_with(
"avx512.mask.pmaxs")) {
3157 }
else if (Name ==
"sse2.pmaxu.b" || Name ==
"sse41.pmaxuw" ||
3158 Name ==
"sse41.pmaxud" || Name.starts_with(
"avx2.pmaxu") ||
3159 Name.starts_with(
"avx512.mask.pmaxu")) {
3161 }
else if (Name ==
"sse41.pminsb" || Name ==
"sse2.pmins.w" ||
3162 Name ==
"sse41.pminsd" || Name.starts_with(
"avx2.pmins") ||
3163 Name.starts_with(
"avx512.mask.pmins")) {
3165 }
else if (Name ==
"sse2.pminu.b" || Name ==
"sse41.pminuw" ||
3166 Name ==
"sse41.pminud" || Name.starts_with(
"avx2.pminu") ||
3167 Name.starts_with(
"avx512.mask.pminu")) {
3169 }
else if (Name ==
"sse2.pmulu.dq" || Name ==
"avx2.pmulu.dq" ||
3170 Name ==
"avx512.pmulu.dq.512" ||
3171 Name.starts_with(
"avx512.mask.pmulu.dq.")) {
3173 }
else if (Name ==
"sse41.pmuldq" || Name ==
"avx2.pmul.dq" ||
3174 Name ==
"avx512.pmul.dq.512" ||
3175 Name.starts_with(
"avx512.mask.pmul.dq.")) {
3177 }
else if (Name ==
"sse.cvtsi2ss" || Name ==
"sse2.cvtsi2sd" ||
3178 Name ==
"sse.cvtsi642ss" || Name ==
"sse2.cvtsi642sd") {
3183 }
else if (Name ==
"avx512.cvtusi2sd") {
3188 }
else if (Name ==
"sse2.cvtss2sd") {
3190 Rep = Builder.CreateFPExt(
3193 }
else if (Name ==
"sse2.cvtdq2pd" || Name ==
"sse2.cvtdq2ps" ||
3194 Name ==
"avx.cvtdq2.pd.256" || Name ==
"avx.cvtdq2.ps.256" ||
3195 Name.starts_with(
"avx512.mask.cvtdq2pd.") ||
3196 Name.starts_with(
"avx512.mask.cvtudq2pd.") ||
3197 Name.starts_with(
"avx512.mask.cvtdq2ps.") ||
3198 Name.starts_with(
"avx512.mask.cvtudq2ps.") ||
3199 Name.starts_with(
"avx512.mask.cvtqq2pd.") ||
3200 Name.starts_with(
"avx512.mask.cvtuqq2pd.") ||
3201 Name ==
"avx512.mask.cvtqq2ps.256" ||
3202 Name ==
"avx512.mask.cvtqq2ps.512" ||
3203 Name ==
"avx512.mask.cvtuqq2ps.256" ||
3204 Name ==
"avx512.mask.cvtuqq2ps.512" || Name ==
"sse2.cvtps2pd" ||
3205 Name ==
"avx.cvt.ps2.pd.256" ||
3206 Name ==
"avx512.mask.cvtps2pd.128" ||
3207 Name ==
"avx512.mask.cvtps2pd.256") {
3212 unsigned NumDstElts = DstTy->getNumElements();
3214 assert(NumDstElts == 2 &&
"Unexpected vector size");
3215 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1});
3218 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3219 bool IsUnsigned = Name.contains(
"cvtu");
3221 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtps2pd");
3225 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3226 : Intrinsic::x86_avx512_sitofp_round;
3227 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3230 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy,
"cvt")
3231 : Builder.CreateSIToFP(Rep, DstTy,
"cvt");
3237 }
else if (Name.starts_with(
"avx512.mask.vcvtph2ps.") ||
3238 Name.starts_with(
"vcvtph2ps.")) {
3242 unsigned NumDstElts = DstTy->getNumElements();
3243 if (NumDstElts != SrcTy->getNumElements()) {
3244 assert(NumDstElts == 4 &&
"Unexpected vector size");
3245 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1, 2, 3});
3247 Rep = Builder.CreateBitCast(
3249 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtph2ps");
3253 }
else if (Name.starts_with(
"avx512.mask.load")) {
3255 bool Aligned = Name[16] !=
'u';
3258 }
else if (Name.starts_with(
"avx512.mask.expand.load.")) {
3261 ResultTy->getNumElements());
3263 Rep = Builder.CreateIntrinsic(
3264 Intrinsic::masked_expandload, ResultTy,
3266 }
else if (Name.starts_with(
"avx512.mask.compress.store.")) {
3272 Rep = Builder.CreateIntrinsic(
3273 Intrinsic::masked_compressstore, ResultTy,
3275 }
else if (Name.starts_with(
"avx512.mask.compress.") ||
3276 Name.starts_with(
"avx512.mask.expand.")) {
3280 ResultTy->getNumElements());
3282 bool IsCompress = Name[12] ==
'c';
3283 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3284 : Intrinsic::x86_avx512_mask_expand;
3285 Rep = Builder.CreateIntrinsic(
3287 }
else if (Name.starts_with(
"xop.vpcom")) {
3289 if (Name.ends_with(
"ub") || Name.ends_with(
"uw") || Name.ends_with(
"ud") ||
3290 Name.ends_with(
"uq"))
3292 else if (Name.ends_with(
"b") || Name.ends_with(
"w") ||
3293 Name.ends_with(
"d") || Name.ends_with(
"q"))
3302 Name = Name.substr(9);
3303 if (Name.starts_with(
"lt"))
3305 else if (Name.starts_with(
"le"))
3307 else if (Name.starts_with(
"gt"))
3309 else if (Name.starts_with(
"ge"))
3311 else if (Name.starts_with(
"eq"))
3313 else if (Name.starts_with(
"ne"))
3315 else if (Name.starts_with(
"false"))
3317 else if (Name.starts_with(
"true"))
3324 }
else if (Name.starts_with(
"xop.vpcmov")) {
3326 Value *NotSel = Builder.CreateNot(Sel);
3329 Rep = Builder.CreateOr(Sel0, Sel1);
3330 }
else if (Name.starts_with(
"xop.vprot") || Name.starts_with(
"avx512.prol") ||
3331 Name.starts_with(
"avx512.mask.prol")) {
3333 }
else if (Name.starts_with(
"avx512.pror") ||
3334 Name.starts_with(
"avx512.mask.pror")) {
3336 }
else if (Name.starts_with(
"avx512.vpshld.") ||
3337 Name.starts_with(
"avx512.mask.vpshld") ||
3338 Name.starts_with(
"avx512.maskz.vpshld")) {
3339 bool ZeroMask = Name[11] ==
'z';
3341 }
else if (Name.starts_with(
"avx512.vpshrd.") ||
3342 Name.starts_with(
"avx512.mask.vpshrd") ||
3343 Name.starts_with(
"avx512.maskz.vpshrd")) {
3344 bool ZeroMask = Name[11] ==
'z';
3346 }
else if (Name ==
"sse42.crc32.64.8") {
3349 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3351 Rep = Builder.CreateZExt(Rep, CI->
getType(),
"");
3352 }
else if (Name.starts_with(
"avx.vbroadcast.s") ||
3353 Name.starts_with(
"avx512.vbroadcast.s")) {
3356 Type *EltTy = VecTy->getElementType();
3357 unsigned EltNum = VecTy->getNumElements();
3361 for (
unsigned I = 0;
I < EltNum; ++
I)
3362 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty,
I));
3363 }
else if (Name.starts_with(
"sse41.pmovsx") ||
3364 Name.starts_with(
"sse41.pmovzx") ||
3365 Name.starts_with(
"avx2.pmovsx") ||
3366 Name.starts_with(
"avx2.pmovzx") ||
3367 Name.starts_with(
"avx512.mask.pmovsx") ||
3368 Name.starts_with(
"avx512.mask.pmovzx")) {
3370 unsigned NumDstElts = DstTy->getNumElements();
3374 for (
unsigned i = 0; i != NumDstElts; ++i)
3379 bool DoSext = Name.contains(
"pmovsx");
3381 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3386 }
else if (Name ==
"avx512.mask.pmov.qd.256" ||
3387 Name ==
"avx512.mask.pmov.qd.512" ||
3388 Name ==
"avx512.mask.pmov.wb.256" ||
3389 Name ==
"avx512.mask.pmov.wb.512") {
3394 }
else if (Name.starts_with(
"avx.vbroadcastf128") ||
3395 Name ==
"avx2.vbroadcasti128") {
3401 if (NumSrcElts == 2)
3402 Rep = Builder.CreateShuffleVector(Load,
ArrayRef<int>{0, 1, 0, 1});
3404 Rep = Builder.CreateShuffleVector(Load,
3406 }
else if (Name.starts_with(
"avx512.mask.shuf.i") ||
3407 Name.starts_with(
"avx512.mask.shuf.f")) {
3412 unsigned ControlBitsMask = NumLanes - 1;
3413 unsigned NumControlBits = NumLanes / 2;
3416 for (
unsigned l = 0; l != NumLanes; ++l) {
3417 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3419 if (l >= NumLanes / 2)
3420 LaneMask += NumLanes;
3421 for (
unsigned i = 0; i != NumElementsInLane; ++i)
3422 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3428 }
else if (Name.starts_with(
"avx512.mask.broadcastf") ||
3429 Name.starts_with(
"avx512.mask.broadcasti")) {
3432 unsigned NumDstElts =
3436 for (
unsigned i = 0; i != NumDstElts; ++i)
3437 ShuffleMask[i] = i % NumSrcElts;
3443 }
else if (Name.starts_with(
"avx2.pbroadcast") ||
3444 Name.starts_with(
"avx2.vbroadcast") ||
3445 Name.starts_with(
"avx512.pbroadcast") ||
3446 Name.starts_with(
"avx512.mask.broadcast.s")) {
3453 Rep = Builder.CreateShuffleVector(
Op, M);
3458 }
else if (Name.starts_with(
"sse2.padds.") ||
3459 Name.starts_with(
"avx2.padds.") ||
3460 Name.starts_with(
"avx512.padds.") ||
3461 Name.starts_with(
"avx512.mask.padds.")) {
3463 }
else if (Name.starts_with(
"sse2.psubs.") ||
3464 Name.starts_with(
"avx2.psubs.") ||
3465 Name.starts_with(
"avx512.psubs.") ||
3466 Name.starts_with(
"avx512.mask.psubs.")) {
3468 }
else if (Name.starts_with(
"sse2.paddus.") ||
3469 Name.starts_with(
"avx2.paddus.") ||
3470 Name.starts_with(
"avx512.mask.paddus.")) {
3472 }
else if (Name.starts_with(
"sse2.psubus.") ||
3473 Name.starts_with(
"avx2.psubus.") ||
3474 Name.starts_with(
"avx512.mask.psubus.")) {
3476 }
else if (Name.starts_with(
"avx512.mask.palignr.")) {
3481 }
else if (Name.starts_with(
"avx512.mask.valign.")) {
3485 }
else if (Name ==
"sse2.psll.dq" || Name ==
"avx2.psll.dq") {
3490 }
else if (Name ==
"sse2.psrl.dq" || Name ==
"avx2.psrl.dq") {
3495 }
else if (Name ==
"sse2.psll.dq.bs" || Name ==
"avx2.psll.dq.bs" ||
3496 Name ==
"avx512.psll.dq.512") {
3500 }
else if (Name ==
"sse2.psrl.dq.bs" || Name ==
"avx2.psrl.dq.bs" ||
3501 Name ==
"avx512.psrl.dq.512") {
3505 }
else if (Name ==
"sse41.pblendw" || Name.starts_with(
"sse41.blendp") ||
3506 Name.starts_with(
"avx.blend.p") || Name ==
"avx2.pblendw" ||
3507 Name.starts_with(
"avx2.pblendd.")) {
3512 unsigned NumElts = VecTy->getNumElements();
3515 for (
unsigned i = 0; i != NumElts; ++i)
3516 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3518 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3519 }
else if (Name.starts_with(
"avx.vinsertf128.") ||
3520 Name ==
"avx2.vinserti128" ||
3521 Name.starts_with(
"avx512.mask.insert")) {
3525 unsigned DstNumElts =
3527 unsigned SrcNumElts =
3529 unsigned Scale = DstNumElts / SrcNumElts;
3536 for (
unsigned i = 0; i != SrcNumElts; ++i)
3538 for (
unsigned i = SrcNumElts; i != DstNumElts; ++i)
3539 Idxs[i] = SrcNumElts;
3540 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3554 for (
unsigned i = 0; i != DstNumElts; ++i)
3557 for (
unsigned i = 0; i != SrcNumElts; ++i)
3558 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3559 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3565 }
else if (Name.starts_with(
"avx.vextractf128.") ||
3566 Name ==
"avx2.vextracti128" ||
3567 Name.starts_with(
"avx512.mask.vextract")) {
3570 unsigned DstNumElts =
3572 unsigned SrcNumElts =
3574 unsigned Scale = SrcNumElts / DstNumElts;
3581 for (
unsigned i = 0; i != DstNumElts; ++i) {
3582 Idxs[i] = i + (Imm * DstNumElts);
3584 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3590 }
else if (Name.starts_with(
"avx512.mask.perm.df.") ||
3591 Name.starts_with(
"avx512.mask.perm.di.")) {
3595 unsigned NumElts = VecTy->getNumElements();
3598 for (
unsigned i = 0; i != NumElts; ++i)
3599 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3601 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3606 }
else if (Name.starts_with(
"avx.vperm2f128.") || Name ==
"avx2.vperm2i128") {
3618 unsigned HalfSize = NumElts / 2;
3630 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3631 for (
unsigned i = 0; i < HalfSize; ++i)
3632 ShuffleMask[i] = StartIndex + i;
3635 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3636 for (
unsigned i = 0; i < HalfSize; ++i)
3637 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3639 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3641 }
else if (Name.starts_with(
"avx.vpermil.") || Name ==
"sse2.pshuf.d" ||
3642 Name.starts_with(
"avx512.mask.vpermil.p") ||
3643 Name.starts_with(
"avx512.mask.pshuf.d.")) {
3647 unsigned NumElts = VecTy->getNumElements();
3649 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3650 unsigned IdxMask = ((1 << IdxSize) - 1);
3656 for (
unsigned i = 0; i != NumElts; ++i)
3657 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3659 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3664 }
else if (Name ==
"sse2.pshufl.w" ||
3665 Name.starts_with(
"avx512.mask.pshufl.w.")) {
3670 if (Name ==
"sse2.pshufl.w" && NumElts % 8 != 0)
3674 for (
unsigned l = 0; l != NumElts; l += 8) {
3675 for (
unsigned i = 0; i != 4; ++i)
3676 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3677 for (
unsigned i = 4; i != 8; ++i)
3678 Idxs[i + l] = i + l;
3681 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3686 }
else if (Name ==
"sse2.pshufh.w" ||
3687 Name.starts_with(
"avx512.mask.pshufh.w.")) {
3692 if (Name ==
"sse2.pshufh.w" && NumElts % 8 != 0)
3696 for (
unsigned l = 0; l != NumElts; l += 8) {
3697 for (
unsigned i = 0; i != 4; ++i)
3698 Idxs[i + l] = i + l;
3699 for (
unsigned i = 0; i != 4; ++i)
3700 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3703 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3708 }
else if (Name.starts_with(
"avx512.mask.shuf.p")) {
3715 unsigned HalfLaneElts = NumLaneElts / 2;
3718 for (
unsigned i = 0; i != NumElts; ++i) {
3720 Idxs[i] = i - (i % NumLaneElts);
3722 if ((i % NumLaneElts) >= HalfLaneElts)
3726 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3729 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3733 }
else if (Name.starts_with(
"avx512.mask.movddup") ||
3734 Name.starts_with(
"avx512.mask.movshdup") ||
3735 Name.starts_with(
"avx512.mask.movsldup")) {
3741 if (Name.starts_with(
"avx512.mask.movshdup."))
3745 for (
unsigned l = 0; l != NumElts; l += NumLaneElts)
3746 for (
unsigned i = 0; i != NumLaneElts; i += 2) {
3747 Idxs[i + l + 0] = i + l +
Offset;
3748 Idxs[i + l + 1] = i + l +
Offset;
3751 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3755 }
else if (Name.starts_with(
"avx512.mask.punpckl") ||
3756 Name.starts_with(
"avx512.mask.unpckl.")) {
3763 for (
int l = 0; l != NumElts; l += NumLaneElts)
3764 for (
int i = 0; i != NumLaneElts; ++i)
3765 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3767 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3771 }
else if (Name.starts_with(
"avx512.mask.punpckh") ||
3772 Name.starts_with(
"avx512.mask.unpckh.")) {
3779 for (
int l = 0; l != NumElts; l += NumLaneElts)
3780 for (
int i = 0; i != NumLaneElts; ++i)
3781 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3783 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3787 }
else if (Name.starts_with(
"avx512.mask.and.") ||
3788 Name.starts_with(
"avx512.mask.pand.")) {
3791 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3793 Rep = Builder.CreateBitCast(Rep, FTy);
3796 }
else if (Name.starts_with(
"avx512.mask.andn.") ||
3797 Name.starts_with(
"avx512.mask.pandn.")) {
3800 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->
getArgOperand(0), ITy));
3801 Rep = Builder.CreateAnd(Rep,
3803 Rep = Builder.CreateBitCast(Rep, FTy);
3806 }
else if (Name.starts_with(
"avx512.mask.or.") ||
3807 Name.starts_with(
"avx512.mask.por.")) {
3810 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3812 Rep = Builder.CreateBitCast(Rep, FTy);
3815 }
else if (Name.starts_with(
"avx512.mask.xor.") ||
3816 Name.starts_with(
"avx512.mask.pxor.")) {
3819 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3821 Rep = Builder.CreateBitCast(Rep, FTy);
3824 }
else if (Name.starts_with(
"avx512.mask.padd.")) {
3828 }
else if (Name.starts_with(
"avx512.mask.psub.")) {
3832 }
else if (Name.starts_with(
"avx512.mask.pmull.")) {
3836 }
else if (Name.starts_with(
"avx512.mask.add.p")) {
3837 if (Name.ends_with(
".512")) {
3839 if (Name[17] ==
's')
3840 IID = Intrinsic::x86_avx512_add_ps_512;
3842 IID = Intrinsic::x86_avx512_add_pd_512;
3844 Rep = Builder.CreateIntrinsic(
3852 }
else if (Name.starts_with(
"avx512.mask.div.p")) {
3853 if (Name.ends_with(
".512")) {
3855 if (Name[17] ==
's')
3856 IID = Intrinsic::x86_avx512_div_ps_512;
3858 IID = Intrinsic::x86_avx512_div_pd_512;
3860 Rep = Builder.CreateIntrinsic(
3868 }
else if (Name.starts_with(
"avx512.mask.mul.p")) {
3869 if (Name.ends_with(
".512")) {
3871 if (Name[17] ==
's')
3872 IID = Intrinsic::x86_avx512_mul_ps_512;
3874 IID = Intrinsic::x86_avx512_mul_pd_512;
3876 Rep = Builder.CreateIntrinsic(
3884 }
else if (Name.starts_with(
"avx512.mask.sub.p")) {
3885 if (Name.ends_with(
".512")) {
3887 if (Name[17] ==
's')
3888 IID = Intrinsic::x86_avx512_sub_ps_512;
3890 IID = Intrinsic::x86_avx512_sub_pd_512;
3892 Rep = Builder.CreateIntrinsic(
3900 }
else if ((Name.starts_with(
"avx512.mask.max.p") ||
3901 Name.starts_with(
"avx512.mask.min.p")) &&
3902 Name.drop_front(18) ==
".512") {
3903 bool IsDouble = Name[17] ==
'd';
3904 bool IsMin = Name[13] ==
'i';
3906 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3907 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3910 Rep = Builder.CreateIntrinsic(
3915 }
else if (Name.starts_with(
"avx512.mask.lzcnt.")) {
3917 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->
getType(),
3918 {CI->getArgOperand(0), Builder.getInt1(false)});
3921 }
else if (Name.starts_with(
"avx512.mask.psll")) {
3922 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3923 bool IsVariable = Name[16] ==
'v';
3924 char Size = Name[16] ==
'.' ? Name[17]
3925 : Name[17] ==
'.' ? Name[18]
3926 : Name[18] ==
'.' ? Name[19]
3930 if (IsVariable && Name[17] !=
'.') {
3931 if (
Size ==
'd' && Name[17] ==
'2')
3932 IID = Intrinsic::x86_avx2_psllv_q;
3933 else if (
Size ==
'd' && Name[17] ==
'4')
3934 IID = Intrinsic::x86_avx2_psllv_q_256;
3935 else if (
Size ==
's' && Name[17] ==
'4')
3936 IID = Intrinsic::x86_avx2_psllv_d;
3937 else if (
Size ==
's' && Name[17] ==
'8')
3938 IID = Intrinsic::x86_avx2_psllv_d_256;
3939 else if (
Size ==
'h' && Name[17] ==
'8')
3940 IID = Intrinsic::x86_avx512_psllv_w_128;
3941 else if (
Size ==
'h' && Name[17] ==
'1')
3942 IID = Intrinsic::x86_avx512_psllv_w_256;
3943 else if (Name[17] ==
'3' && Name[18] ==
'2')
3944 IID = Intrinsic::x86_avx512_psllv_w_512;
3947 }
else if (Name.ends_with(
".128")) {
3949 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3950 : Intrinsic::x86_sse2_psll_d;
3951 else if (
Size ==
'q')
3952 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3953 : Intrinsic::x86_sse2_psll_q;
3954 else if (
Size ==
'w')
3955 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3956 : Intrinsic::x86_sse2_psll_w;
3959 }
else if (Name.ends_with(
".256")) {
3961 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3962 : Intrinsic::x86_avx2_psll_d;
3963 else if (
Size ==
'q')
3964 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3965 : Intrinsic::x86_avx2_psll_q;
3966 else if (
Size ==
'w')
3967 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3968 : Intrinsic::x86_avx2_psll_w;
3973 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3974 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3975 : Intrinsic::x86_avx512_psll_d_512;
3976 else if (
Size ==
'q')
3977 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3978 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3979 : Intrinsic::x86_avx512_psll_q_512;
3980 else if (
Size ==
'w')
3981 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3982 : Intrinsic::x86_avx512_psll_w_512;
3988 }
else if (Name.starts_with(
"avx512.mask.psrl")) {
3989 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3990 bool IsVariable = Name[16] ==
'v';
3991 char Size = Name[16] ==
'.' ? Name[17]
3992 : Name[17] ==
'.' ? Name[18]
3993 : Name[18] ==
'.' ? Name[19]
3997 if (IsVariable && Name[17] !=
'.') {
3998 if (
Size ==
'd' && Name[17] ==
'2')
3999 IID = Intrinsic::x86_avx2_psrlv_q;
4000 else if (
Size ==
'd' && Name[17] ==
'4')
4001 IID = Intrinsic::x86_avx2_psrlv_q_256;
4002 else if (
Size ==
's' && Name[17] ==
'4')
4003 IID = Intrinsic::x86_avx2_psrlv_d;
4004 else if (
Size ==
's' && Name[17] ==
'8')
4005 IID = Intrinsic::x86_avx2_psrlv_d_256;
4006 else if (
Size ==
'h' && Name[17] ==
'8')
4007 IID = Intrinsic::x86_avx512_psrlv_w_128;
4008 else if (
Size ==
'h' && Name[17] ==
'1')
4009 IID = Intrinsic::x86_avx512_psrlv_w_256;
4010 else if (Name[17] ==
'3' && Name[18] ==
'2')
4011 IID = Intrinsic::x86_avx512_psrlv_w_512;
4014 }
else if (Name.ends_with(
".128")) {
4016 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
4017 : Intrinsic::x86_sse2_psrl_d;
4018 else if (
Size ==
'q')
4019 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
4020 : Intrinsic::x86_sse2_psrl_q;
4021 else if (
Size ==
'w')
4022 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
4023 : Intrinsic::x86_sse2_psrl_w;
4026 }
else if (Name.ends_with(
".256")) {
4028 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
4029 : Intrinsic::x86_avx2_psrl_d;
4030 else if (
Size ==
'q')
4031 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
4032 : Intrinsic::x86_avx2_psrl_q;
4033 else if (
Size ==
'w')
4034 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
4035 : Intrinsic::x86_avx2_psrl_w;
4040 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
4041 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
4042 : Intrinsic::x86_avx512_psrl_d_512;
4043 else if (
Size ==
'q')
4044 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
4045 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
4046 : Intrinsic::x86_avx512_psrl_q_512;
4047 else if (
Size ==
'w')
4048 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
4049 : Intrinsic::x86_avx512_psrl_w_512;
4055 }
else if (Name.starts_with(
"avx512.mask.psra")) {
4056 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
4057 bool IsVariable = Name[16] ==
'v';
4058 char Size = Name[16] ==
'.' ? Name[17]
4059 : Name[17] ==
'.' ? Name[18]
4060 : Name[18] ==
'.' ? Name[19]
4064 if (IsVariable && Name[17] !=
'.') {
4065 if (
Size ==
's' && Name[17] ==
'4')
4066 IID = Intrinsic::x86_avx2_psrav_d;
4067 else if (
Size ==
's' && Name[17] ==
'8')
4068 IID = Intrinsic::x86_avx2_psrav_d_256;
4069 else if (
Size ==
'h' && Name[17] ==
'8')
4070 IID = Intrinsic::x86_avx512_psrav_w_128;
4071 else if (
Size ==
'h' && Name[17] ==
'1')
4072 IID = Intrinsic::x86_avx512_psrav_w_256;
4073 else if (Name[17] ==
'3' && Name[18] ==
'2')
4074 IID = Intrinsic::x86_avx512_psrav_w_512;
4077 }
else if (Name.ends_with(
".128")) {
4079 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4080 : Intrinsic::x86_sse2_psra_d;
4081 else if (
Size ==
'q')
4082 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4083 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4084 : Intrinsic::x86_avx512_psra_q_128;
4085 else if (
Size ==
'w')
4086 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4087 : Intrinsic::x86_sse2_psra_w;
4090 }
else if (Name.ends_with(
".256")) {
4092 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4093 : Intrinsic::x86_avx2_psra_d;
4094 else if (
Size ==
'q')
4095 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4096 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4097 : Intrinsic::x86_avx512_psra_q_256;
4098 else if (
Size ==
'w')
4099 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4100 : Intrinsic::x86_avx2_psra_w;
4105 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4106 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4107 : Intrinsic::x86_avx512_psra_d_512;
4108 else if (
Size ==
'q')
4109 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4110 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4111 : Intrinsic::x86_avx512_psra_q_512;
4112 else if (
Size ==
'w')
4113 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4114 : Intrinsic::x86_avx512_psra_w_512;
4120 }
else if (Name.starts_with(
"avx512.mask.move.s")) {
4122 }
else if (Name.starts_with(
"avx512.cvtmask2")) {
4124 }
else if (Name.ends_with(
".movntdqa")) {
4128 LoadInst *LI = Builder.CreateAlignedLoad(
4133 }
else if (Name.starts_with(
"fma.vfmadd.") ||
4134 Name.starts_with(
"fma.vfmsub.") ||
4135 Name.starts_with(
"fma.vfnmadd.") ||
4136 Name.starts_with(
"fma.vfnmsub.")) {
4137 bool NegMul = Name[6] ==
'n';
4138 bool NegAcc = NegMul ? Name[8] ==
's' : Name[7] ==
's';
4139 bool IsScalar = NegMul ? Name[12] ==
's' : Name[11] ==
's';
4150 if (NegMul && !IsScalar)
4151 Ops[0] = Builder.CreateFNeg(
Ops[0]);
4152 if (NegMul && IsScalar)
4153 Ops[1] = Builder.CreateFNeg(
Ops[1]);
4155 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4157 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
4161 }
else if (Name.starts_with(
"fma4.vfmadd.s")) {
4169 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
4173 }
else if (Name.starts_with(
"avx512.mask.vfmadd.s") ||
4174 Name.starts_with(
"avx512.maskz.vfmadd.s") ||
4175 Name.starts_with(
"avx512.mask3.vfmadd.s") ||
4176 Name.starts_with(
"avx512.mask3.vfmsub.s") ||
4177 Name.starts_with(
"avx512.mask3.vfnmsub.s")) {
4178 bool IsMask3 = Name[11] ==
'3';
4179 bool IsMaskZ = Name[11] ==
'z';
4181 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4182 bool NegMul = Name[2] ==
'n';
4183 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
4189 if (NegMul && (IsMask3 || IsMaskZ))
4190 A = Builder.CreateFNeg(
A);
4191 if (NegMul && !(IsMask3 || IsMaskZ))
4192 B = Builder.CreateFNeg(
B);
4194 C = Builder.CreateFNeg(
C);
4196 A = Builder.CreateExtractElement(
A, (
uint64_t)0);
4197 B = Builder.CreateExtractElement(
B, (
uint64_t)0);
4198 C = Builder.CreateExtractElement(
C, (
uint64_t)0);
4205 if (Name.back() ==
'd')
4206 IID = Intrinsic::x86_avx512_vfmadd_f64;
4208 IID = Intrinsic::x86_avx512_vfmadd_f32;
4209 Rep = Builder.CreateIntrinsic(IID,
Ops);
4211 Rep = Builder.CreateFMA(
A,
B,
C);
4220 if (NegAcc && IsMask3)
4225 Rep = Builder.CreateInsertElement(CI->
getArgOperand(IsMask3 ? 2 : 0), Rep,
4227 }
else if (Name.starts_with(
"avx512.mask.vfmadd.p") ||
4228 Name.starts_with(
"avx512.mask.vfnmadd.p") ||
4229 Name.starts_with(
"avx512.mask.vfnmsub.p") ||
4230 Name.starts_with(
"avx512.mask3.vfmadd.p") ||
4231 Name.starts_with(
"avx512.mask3.vfmsub.p") ||
4232 Name.starts_with(
"avx512.mask3.vfnmsub.p") ||
4233 Name.starts_with(
"avx512.maskz.vfmadd.p")) {
4234 bool IsMask3 = Name[11] ==
'3';
4235 bool IsMaskZ = Name[11] ==
'z';
4237 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4238 bool NegMul = Name[2] ==
'n';
4239 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
4245 if (NegMul && (IsMask3 || IsMaskZ))
4246 A = Builder.CreateFNeg(
A);
4247 if (NegMul && !(IsMask3 || IsMaskZ))
4248 B = Builder.CreateFNeg(
B);
4250 C = Builder.CreateFNeg(
C);
4257 if (Name[Name.size() - 5] ==
's')
4258 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4260 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4264 Rep = Builder.CreateFMA(
A,
B,
C);
4272 }
else if (Name.starts_with(
"fma.vfmsubadd.p")) {
4276 if (VecWidth == 128 && EltWidth == 32)
4277 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4278 else if (VecWidth == 256 && EltWidth == 32)
4279 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4280 else if (VecWidth == 128 && EltWidth == 64)
4281 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4282 else if (VecWidth == 256 && EltWidth == 64)
4283 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4289 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4290 Rep = Builder.CreateIntrinsic(IID,
Ops);
4291 }
else if (Name.starts_with(
"avx512.mask.vfmaddsub.p") ||
4292 Name.starts_with(
"avx512.mask3.vfmaddsub.p") ||
4293 Name.starts_with(
"avx512.maskz.vfmaddsub.p") ||
4294 Name.starts_with(
"avx512.mask3.vfmsubadd.p")) {
4295 bool IsMask3 = Name[11] ==
'3';
4296 bool IsMaskZ = Name[11] ==
'z';
4298 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4299 bool IsSubAdd = Name[3] ==
's';
4303 if (Name[Name.size() - 5] ==
's')
4304 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4306 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4311 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4313 Rep = Builder.CreateIntrinsic(IID,
Ops);
4322 Value *Odd = Builder.CreateCall(FMA,
Ops);
4323 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4324 Value *Even = Builder.CreateCall(FMA,
Ops);
4330 for (
int i = 0; i != NumElts; ++i)
4331 Idxs[i] = i + (i % 2) * NumElts;
4333 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4341 }
else if (Name.starts_with(
"avx512.mask.pternlog.") ||
4342 Name.starts_with(
"avx512.maskz.pternlog.")) {
4343 bool ZeroMask = Name[11] ==
'z';
4347 if (VecWidth == 128 && EltWidth == 32)
4348 IID = Intrinsic::x86_avx512_pternlog_d_128;
4349 else if (VecWidth == 256 && EltWidth == 32)
4350 IID = Intrinsic::x86_avx512_pternlog_d_256;
4351 else if (VecWidth == 512 && EltWidth == 32)
4352 IID = Intrinsic::x86_avx512_pternlog_d_512;
4353 else if (VecWidth == 128 && EltWidth == 64)
4354 IID = Intrinsic::x86_avx512_pternlog_q_128;
4355 else if (VecWidth == 256 && EltWidth == 64)
4356 IID = Intrinsic::x86_avx512_pternlog_q_256;
4357 else if (VecWidth == 512 && EltWidth == 64)
4358 IID = Intrinsic::x86_avx512_pternlog_q_512;
4364 Rep = Builder.CreateIntrinsic(IID, Args);
4368 }
else if (Name.starts_with(
"avx512.mask.vpmadd52") ||
4369 Name.starts_with(
"avx512.maskz.vpmadd52")) {
4370 bool ZeroMask = Name[11] ==
'z';
4371 bool High = Name[20] ==
'h' || Name[21] ==
'h';
4374 if (VecWidth == 128 && !
High)
4375 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4376 else if (VecWidth == 256 && !
High)
4377 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4378 else if (VecWidth == 512 && !
High)
4379 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4380 else if (VecWidth == 128 &&
High)
4381 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4382 else if (VecWidth == 256 &&
High)
4383 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4384 else if (VecWidth == 512 &&
High)
4385 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4391 Rep = Builder.CreateIntrinsic(IID, Args);
4395 }
else if (Name.starts_with(
"avx512.mask.vpermi2var.") ||
4396 Name.starts_with(
"avx512.mask.vpermt2var.") ||
4397 Name.starts_with(
"avx512.maskz.vpermt2var.")) {
4398 bool ZeroMask = Name[11] ==
'z';
4399 bool IndexForm = Name[17] ==
'i';
4401 }
else if (Name.starts_with(
"avx512.mask.vpdpbusd.") ||
4402 Name.starts_with(
"avx512.maskz.vpdpbusd.") ||
4403 Name.starts_with(
"avx512.mask.vpdpbusds.") ||
4404 Name.starts_with(
"avx512.maskz.vpdpbusds.")) {
4405 bool ZeroMask = Name[11] ==
'z';
4406 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4409 if (VecWidth == 128 && !IsSaturating)
4410 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4411 else if (VecWidth == 256 && !IsSaturating)
4412 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4413 else if (VecWidth == 512 && !IsSaturating)
4414 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4415 else if (VecWidth == 128 && IsSaturating)
4416 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4417 else if (VecWidth == 256 && IsSaturating)
4418 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4419 else if (VecWidth == 512 && IsSaturating)
4420 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4430 if (Args[1]->
getType()->isVectorTy() &&
4433 ->isIntegerTy(32) &&
4434 Args[2]->
getType()->isVectorTy() &&
4437 ->isIntegerTy(32)) {
4438 Type *NewArgType =
nullptr;
4439 if (VecWidth == 128)
4441 else if (VecWidth == 256)
4443 else if (VecWidth == 512)
4449 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4450 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4453 Rep = Builder.CreateIntrinsic(IID, Args);
4457 }
else if (Name.starts_with(
"avx512.mask.vpdpwssd.") ||
4458 Name.starts_with(
"avx512.maskz.vpdpwssd.") ||
4459 Name.starts_with(
"avx512.mask.vpdpwssds.") ||
4460 Name.starts_with(
"avx512.maskz.vpdpwssds.")) {
4461 bool ZeroMask = Name[11] ==
'z';
4462 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4465 if (VecWidth == 128 && !IsSaturating)
4466 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4467 else if (VecWidth == 256 && !IsSaturating)
4468 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4469 else if (VecWidth == 512 && !IsSaturating)
4470 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4471 else if (VecWidth == 128 && IsSaturating)
4472 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4473 else if (VecWidth == 256 && IsSaturating)
4474 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4475 else if (VecWidth == 512 && IsSaturating)
4476 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4486 if (Args[1]->
getType()->isVectorTy() &&
4489 ->isIntegerTy(32) &&
4490 Args[2]->
getType()->isVectorTy() &&
4493 ->isIntegerTy(32)) {
4494 Type *NewArgType =
nullptr;
4495 if (VecWidth == 128)
4497 else if (VecWidth == 256)
4499 else if (VecWidth == 512)
4505 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4506 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4509 Rep = Builder.CreateIntrinsic(IID, Args);
4513 }
else if (Name ==
"addcarryx.u32" || Name ==
"addcarryx.u64" ||
4514 Name ==
"addcarry.u32" || Name ==
"addcarry.u64" ||
4515 Name ==
"subborrow.u32" || Name ==
"subborrow.u64") {
4517 if (Name[0] ==
'a' && Name.back() ==
'2')
4518 IID = Intrinsic::x86_addcarry_32;
4519 else if (Name[0] ==
'a' && Name.back() ==
'4')
4520 IID = Intrinsic::x86_addcarry_64;
4521 else if (Name[0] ==
's' && Name.back() ==
'2')
4522 IID = Intrinsic::x86_subborrow_32;
4523 else if (Name[0] ==
's' && Name.back() ==
'4')
4524 IID = Intrinsic::x86_subborrow_64;
4531 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4534 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
4537 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4541 }
else if (Name.starts_with(
"avx512.mask.") &&
4552 if (Name.starts_with(
"neon.bfcvt")) {
4553 if (Name.starts_with(
"neon.bfcvtn2")) {
4555 std::iota(LoMask.
begin(), LoMask.
end(), 0);
4557 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4558 Value *Inactive = Builder.CreateShuffleVector(CI->
getOperand(0), LoMask);
4561 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4562 }
else if (Name.starts_with(
"neon.bfcvtn")) {
4564 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4568 dbgs() <<
"Trunc: " << *Trunc <<
"\n";
4569 return Builder.CreateShuffleVector(
4572 return Builder.CreateFPTrunc(CI->
getOperand(0),
4575 }
else if (Name.starts_with(
"sve.fcvt")) {
4578 .
Case(
"sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4579 .
Case(
"sve.fcvtnt.bf16f32",
4580 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4592 if (Args[1]->
getType() != BadPredTy)
4595 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4596 BadPredTy, Args[1]);
4597 Args[1] = Builder.CreateIntrinsic(
4598 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4600 return Builder.CreateIntrinsic(NewID, Args,
nullptr,
4609 if (Name ==
"mve.vctp64.old") {
4612 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4615 Value *C1 = Builder.CreateIntrinsic(
4616 Intrinsic::arm_mve_pred_v2i,
4618 return Builder.CreateIntrinsic(
4619 Intrinsic::arm_mve_pred_i2v,
4621 }
else if (Name ==
"mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4622 Name ==
"mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4623 Name ==
"mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4624 Name ==
"mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4626 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4627 Name ==
"mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4628 Name ==
"mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4629 Name ==
"mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4631 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4632 Name ==
"mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4633 Name ==
"cde.vcx1q.predicated.v2i64.v4i1" ||
4634 Name ==
"cde.vcx1qa.predicated.v2i64.v4i1" ||
4635 Name ==
"cde.vcx2q.predicated.v2i64.v4i1" ||
4636 Name ==
"cde.vcx2qa.predicated.v2i64.v4i1" ||
4637 Name ==
"cde.vcx3q.predicated.v2i64.v4i1" ||
4638 Name ==
"cde.vcx3qa.predicated.v2i64.v4i1") {
4639 std::vector<Type *> Tys;
4643 case Intrinsic::arm_mve_mull_int_predicated:
4644 case Intrinsic::arm_mve_vqdmull_predicated:
4645 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4648 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4649 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4650 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4654 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4658 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4662 case Intrinsic::arm_cde_vcx1q_predicated:
4663 case Intrinsic::arm_cde_vcx1qa_predicated:
4664 case Intrinsic::arm_cde_vcx2q_predicated:
4665 case Intrinsic::arm_cde_vcx2qa_predicated:
4666 case Intrinsic::arm_cde_vcx3q_predicated:
4667 case Intrinsic::arm_cde_vcx3qa_predicated:
4674 std::vector<Value *>
Ops;
4676 Type *Ty =
Op->getType();
4677 if (Ty->getScalarSizeInBits() == 1) {
4678 Value *C1 = Builder.CreateIntrinsic(
4679 Intrinsic::arm_mve_pred_v2i,
4681 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4686 return Builder.CreateIntrinsic(
ID, Tys,
Ops,
nullptr,
4701 auto UpgradeLegacyWMMAIUIntrinsicCall =
4706 Args.push_back(Builder.getFalse());
4710 F->getParent(),
F->getIntrinsicID(), OverloadTys);
4717 auto *NewCall =
cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4722 NewCall->copyMetadata(*CI);
4726 if (
F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
4727 assert(CI->
arg_size() == 7 &&
"Legacy int_amdgcn_wmma_i32_16x16x64_iu8 "
4728 "intrinsic should have 7 arguments");
4731 return UpgradeLegacyWMMAIUIntrinsicCall(
F, CI, Builder, {
T1, T2});
4733 if (
F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8) {
4734 assert(CI->
arg_size() == 8 &&
"Legacy int_amdgcn_swmmac_i32_16x16x128_iu8 "
4735 "intrinsic should have 8 arguments");
4740 return UpgradeLegacyWMMAIUIntrinsicCall(
F, CI, Builder, {
T1, T2, T3, T4});
4743 switch (
F->getIntrinsicID()) {
4746 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
4747 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
4748 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
4749 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
4750 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
4751 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16: {
4766 if (
F->getIntrinsicID() == Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16)
4769 F->getParent(),
F->getIntrinsicID(), Overloads);
4774 auto *NewCall =
cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4779 NewCall->copyMetadata(*CI);
4780 NewCall->takeName(CI);
4802 if (NumOperands < 3)
4815 bool IsVolatile =
false;
4819 if (NumOperands > 3)
4824 if (NumOperands > 5) {
4826 IsVolatile = !VolatileArg || !VolatileArg->
isZero();
4840 if (VT->getElementType()->isIntegerTy(16)) {
4843 Val = Builder.CreateBitCast(Val, AsBF16);
4851 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4853 unsigned AddrSpace = PtrTy->getAddressSpace();
4856 RMW->
setMetadata(
"amdgpu.no.fine.grained.memory", EmptyMD);
4858 RMW->
setMetadata(
"amdgpu.ignore.denormal.mode", EmptyMD);
4863 MDNode *RangeNotPrivate =
4866 RMW->
setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4872 return Builder.CreateBitCast(RMW, RetTy);
4893 return MAV->getMetadata();
4900 return I->getDebugLoc().getAsMDNode();
4908 if (Name ==
"label") {
4911 }
else if (Name ==
"assign") {
4918 }
else if (Name ==
"declare") {
4923 }
else if (Name ==
"addr") {
4933 unwrapMAVOp(CI, 1), ExprNode,
nullptr,
nullptr,
nullptr,
4935 }
else if (Name ==
"value") {
4938 unsigned ExprOp = 2;
4952 assert(DR &&
"Unhandled intrinsic kind in upgrade to DbgRecord");
4960 int64_t OffsetVal =
Offset->getSExtValue();
4961 return Builder.CreateIntrinsic(OffsetVal >= 0
4962 ? Intrinsic::vector_splice_left
4963 : Intrinsic::vector_splice_right,
4965 {CI->getArgOperand(0), CI->getArgOperand(1),
4966 Builder.getInt32(std::abs(OffsetVal))});
4971 if (Name.starts_with(
"to.fp16")) {
4973 Builder.CreateFPTrunc(CI->
getArgOperand(0), Builder.getHalfTy());
4974 return Builder.CreateBitCast(Cast, CI->
getType());
4977 if (Name.starts_with(
"from.fp16")) {
4979 Builder.CreateBitCast(CI->
getArgOperand(0), Builder.getHalfTy());
4980 return Builder.CreateFPExt(Cast, CI->
getType());
5005 if (!Name.consume_front(
"llvm."))
5008 bool IsX86 = Name.consume_front(
"x86.");
5009 bool IsNVVM = Name.consume_front(
"nvvm.");
5010 bool IsAArch64 = Name.consume_front(
"aarch64.");
5011 bool IsARM = Name.consume_front(
"arm.");
5012 bool IsAMDGCN = Name.consume_front(
"amdgcn.");
5013 bool IsDbg = Name.consume_front(
"dbg.");
5015 (Name.consume_front(
"experimental.vector.splice") ||
5016 Name.consume_front(
"vector.splice")) &&
5017 !(Name.starts_with(
".left") || Name.starts_with(
".right"));
5018 Value *Rep =
nullptr;
5020 if (!IsX86 && Name ==
"stackprotectorcheck") {
5022 }
else if (IsNVVM) {
5026 }
else if (IsAArch64) {
5030 }
else if (IsAMDGCN) {
5034 }
else if (IsOldSplice) {
5036 }
else if (Name.consume_front(
"convert.")) {
5048 const auto &DefaultCase = [&]() ->
void {
5056 "Unknown function for CallBase upgrade and isn't just a name change");
5064 "Return type must have changed");
5065 assert(OldST->getNumElements() ==
5067 "Must have same number of elements");
5070 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
5073 for (
unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
5074 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
5075 Res = Builder.CreateInsertValue(Res, Elem, Idx);
5094 case Intrinsic::arm_neon_vst1:
5095 case Intrinsic::arm_neon_vst2:
5096 case Intrinsic::arm_neon_vst3:
5097 case Intrinsic::arm_neon_vst4:
5098 case Intrinsic::arm_neon_vst2lane:
5099 case Intrinsic::arm_neon_vst3lane:
5100 case Intrinsic::arm_neon_vst4lane: {
5102 NewCall = Builder.CreateCall(NewFn, Args);
5105 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
5106 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
5107 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
5112 NewCall = Builder.CreateCall(NewFn, Args);
5115 case Intrinsic::aarch64_sve_ld3_sret:
5116 case Intrinsic::aarch64_sve_ld4_sret:
5117 case Intrinsic::aarch64_sve_ld2_sret: {
5125 Name = Name.substr(5);
5132 unsigned MinElts = RetTy->getMinNumElements() /
N;
5134 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
5136 for (
unsigned I = 0;
I <
N;
I++) {
5137 Value *SRet = Builder.CreateExtractValue(NewLdCall,
I);
5138 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet,
I * MinElts);
5144 case Intrinsic::coro_end: {
5147 NewCall = Builder.CreateCall(NewFn, Args);
5151 case Intrinsic::vector_extract: {
5153 Name = Name.substr(5);
5154 if (!Name.starts_with(
"aarch64.sve.tuple.get")) {
5159 unsigned MinElts = RetTy->getMinNumElements();
5162 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0), NewIdx});
5166 case Intrinsic::vector_insert: {
5168 Name = Name.substr(5);
5169 if (!Name.starts_with(
"aarch64.sve.tuple")) {
5173 if (Name.starts_with(
"aarch64.sve.tuple.set")) {
5178 NewCall = Builder.CreateCall(
5182 if (Name.starts_with(
"aarch64.sve.tuple.create")) {
5188 assert(
N > 1 &&
"Create is expected to be between 2-4");
5191 unsigned MinElts = RetTy->getMinNumElements() /
N;
5192 for (
unsigned I = 0;
I <
N;
I++) {
5194 Ret = Builder.CreateInsertVector(RetTy, Ret, V,
I * MinElts);
5201 case Intrinsic::arm_neon_bfdot:
5202 case Intrinsic::arm_neon_bfmmla:
5203 case Intrinsic::arm_neon_bfmlalb:
5204 case Intrinsic::arm_neon_bfmlalt:
5205 case Intrinsic::aarch64_neon_bfdot:
5206 case Intrinsic::aarch64_neon_bfmmla:
5207 case Intrinsic::aarch64_neon_bfmlalb:
5208 case Intrinsic::aarch64_neon_bfmlalt: {
5211 "Mismatch between function args and call args");
5212 size_t OperandWidth =
5214 assert((OperandWidth == 64 || OperandWidth == 128) &&
5215 "Unexpected operand width");
5217 auto Iter = CI->
args().begin();
5218 Args.push_back(*Iter++);
5219 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5220 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5221 NewCall = Builder.CreateCall(NewFn, Args);
5225 case Intrinsic::bitreverse:
5226 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5229 case Intrinsic::ctlz:
5230 case Intrinsic::cttz: {
5237 Builder.CreateCall(NewFn, {CI->
getArgOperand(0), Builder.getFalse()});
5241 case Intrinsic::objectsize: {
5242 Value *NullIsUnknownSize =
5246 NewCall = Builder.CreateCall(
5251 case Intrinsic::ctpop:
5252 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5254 case Intrinsic::dbg_value: {
5256 Name = Name.substr(5);
5258 if (Name.starts_with(
"dbg.addr")) {
5272 if (
Offset->isNullValue()) {
5273 NewCall = Builder.CreateCall(
5282 case Intrinsic::ptr_annotation:
5290 NewCall = Builder.CreateCall(
5299 case Intrinsic::var_annotation:
5306 NewCall = Builder.CreateCall(
5315 case Intrinsic::riscv_aes32dsi:
5316 case Intrinsic::riscv_aes32dsmi:
5317 case Intrinsic::riscv_aes32esi:
5318 case Intrinsic::riscv_aes32esmi:
5319 case Intrinsic::riscv_sm4ks:
5320 case Intrinsic::riscv_sm4ed: {
5330 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5331 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5337 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5338 Value *Res = NewCall;
5340 Res = Builder.CreateIntCast(NewCall, CI->
getType(),
true);
5346 case Intrinsic::nvvm_mapa_shared_cluster: {
5350 Value *Res = NewCall;
5351 Res = Builder.CreateAddrSpaceCast(
5358 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5359 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5362 Args[0] = Builder.CreateAddrSpaceCast(
5365 NewCall = Builder.CreateCall(NewFn, Args);
5371 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5372 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5373 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5374 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5375 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5376 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5377 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5378 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5385 Args[0] = Builder.CreateAddrSpaceCast(
5394 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5396 NewCall = Builder.CreateCall(NewFn, Args);
5402 case Intrinsic::riscv_sha256sig0:
5403 case Intrinsic::riscv_sha256sig1:
5404 case Intrinsic::riscv_sha256sum0:
5405 case Intrinsic::riscv_sha256sum1:
5406 case Intrinsic::riscv_sm3p0:
5407 case Intrinsic::riscv_sm3p1: {
5414 Builder.CreateTrunc(CI->
getArgOperand(0), Builder.getInt32Ty());
5416 NewCall = Builder.CreateCall(NewFn, Arg);
5418 Builder.CreateIntCast(NewCall, CI->
getType(),
true);
5425 case Intrinsic::x86_xop_vfrcz_ss:
5426 case Intrinsic::x86_xop_vfrcz_sd:
5427 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(1)});
5430 case Intrinsic::x86_xop_vpermil2pd:
5431 case Intrinsic::x86_xop_vpermil2ps:
5432 case Intrinsic::x86_xop_vpermil2pd_256:
5433 case Intrinsic::x86_xop_vpermil2ps_256: {
5437 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5438 NewCall = Builder.CreateCall(NewFn, Args);
5442 case Intrinsic::x86_sse41_ptestc:
5443 case Intrinsic::x86_sse41_ptestz:
5444 case Intrinsic::x86_sse41_ptestnzc: {
5458 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy,
"cast");
5459 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
5461 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5465 case Intrinsic::x86_rdtscp: {
5471 NewCall = Builder.CreateCall(NewFn);
5473 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
5476 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5484 case Intrinsic::x86_sse41_insertps:
5485 case Intrinsic::x86_sse41_dppd:
5486 case Intrinsic::x86_sse41_dpps:
5487 case Intrinsic::x86_sse41_mpsadbw:
5488 case Intrinsic::x86_avx_dp_ps_256:
5489 case Intrinsic::x86_avx2_mpsadbw: {
5495 Args.back() = Builder.CreateTrunc(Args.back(),
Type::getInt8Ty(
C),
"trunc");
5496 NewCall = Builder.CreateCall(NewFn, Args);
5500 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5501 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5502 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5503 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5504 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5505 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5511 NewCall = Builder.CreateCall(NewFn, Args);
5520 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5521 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5522 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5523 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5524 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5525 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5529 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5530 Args[1] = Builder.CreateBitCast(
5533 NewCall = Builder.CreateCall(NewFn, Args);
5534 Value *Res = Builder.CreateBitCast(
5542 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5543 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5544 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5548 Args[1] = Builder.CreateBitCast(
5550 Args[2] = Builder.CreateBitCast(
5553 NewCall = Builder.CreateCall(NewFn, Args);
5557 case Intrinsic::thread_pointer: {
5558 NewCall = Builder.CreateCall(NewFn, {});
5562 case Intrinsic::memcpy:
5563 case Intrinsic::memmove:
5564 case Intrinsic::memset: {
5580 NewCall = Builder.CreateCall(NewFn, Args);
5582 AttributeList NewAttrs = AttributeList::get(
5583 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5584 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5585 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5590 MemCI->setDestAlignment(
Align->getMaybeAlignValue());
5593 MTI->setSourceAlignment(
Align->getMaybeAlignValue());
5597 case Intrinsic::masked_load:
5598 case Intrinsic::masked_gather:
5599 case Intrinsic::masked_store:
5600 case Intrinsic::masked_scatter: {
5606 auto GetMaybeAlign = [](
Value *
Op) {
5616 auto GetAlign = [&](
Value *
Op) {
5625 case Intrinsic::masked_load:
5626 NewCall = Builder.CreateMaskedLoad(
5630 case Intrinsic::masked_gather:
5631 NewCall = Builder.CreateMaskedGather(
5637 case Intrinsic::masked_store:
5638 NewCall = Builder.CreateMaskedStore(
5642 case Intrinsic::masked_scatter:
5643 NewCall = Builder.CreateMaskedScatter(
5645 DL.getValueOrABITypeAlignment(
5659 case Intrinsic::lifetime_start:
5660 case Intrinsic::lifetime_end: {
5672 NewCall = Builder.CreateLifetimeStart(Ptr);
5674 NewCall = Builder.CreateLifetimeEnd(Ptr);
5683 case Intrinsic::x86_avx512_vpdpbusd_128:
5684 case Intrinsic::x86_avx512_vpdpbusd_256:
5685 case Intrinsic::x86_avx512_vpdpbusd_512:
5686 case Intrinsic::x86_avx512_vpdpbusds_128:
5687 case Intrinsic::x86_avx512_vpdpbusds_256:
5688 case Intrinsic::x86_avx512_vpdpbusds_512:
5689 case Intrinsic::x86_avx2_vpdpbssd_128:
5690 case Intrinsic::x86_avx2_vpdpbssd_256:
5691 case Intrinsic::x86_avx10_vpdpbssd_512:
5692 case Intrinsic::x86_avx2_vpdpbssds_128:
5693 case Intrinsic::x86_avx2_vpdpbssds_256:
5694 case Intrinsic::x86_avx10_vpdpbssds_512:
5695 case Intrinsic::x86_avx2_vpdpbsud_128:
5696 case Intrinsic::x86_avx2_vpdpbsud_256:
5697 case Intrinsic::x86_avx10_vpdpbsud_512:
5698 case Intrinsic::x86_avx2_vpdpbsuds_128:
5699 case Intrinsic::x86_avx2_vpdpbsuds_256:
5700 case Intrinsic::x86_avx10_vpdpbsuds_512:
5701 case Intrinsic::x86_avx2_vpdpbuud_128:
5702 case Intrinsic::x86_avx2_vpdpbuud_256:
5703 case Intrinsic::x86_avx10_vpdpbuud_512:
5704 case Intrinsic::x86_avx2_vpdpbuuds_128:
5705 case Intrinsic::x86_avx2_vpdpbuuds_256:
5706 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5711 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5712 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5714 NewCall = Builder.CreateCall(NewFn, Args);
5717 case Intrinsic::x86_avx512_vpdpwssd_128:
5718 case Intrinsic::x86_avx512_vpdpwssd_256:
5719 case Intrinsic::x86_avx512_vpdpwssd_512:
5720 case Intrinsic::x86_avx512_vpdpwssds_128:
5721 case Intrinsic::x86_avx512_vpdpwssds_256:
5722 case Intrinsic::x86_avx512_vpdpwssds_512:
5723 case Intrinsic::x86_avx2_vpdpwsud_128:
5724 case Intrinsic::x86_avx2_vpdpwsud_256:
5725 case Intrinsic::x86_avx10_vpdpwsud_512:
5726 case Intrinsic::x86_avx2_vpdpwsuds_128:
5727 case Intrinsic::x86_avx2_vpdpwsuds_256:
5728 case Intrinsic::x86_avx10_vpdpwsuds_512:
5729 case Intrinsic::x86_avx2_vpdpwusd_128:
5730 case Intrinsic::x86_avx2_vpdpwusd_256:
5731 case Intrinsic::x86_avx10_vpdpwusd_512:
5732 case Intrinsic::x86_avx2_vpdpwusds_128:
5733 case Intrinsic::x86_avx2_vpdpwusds_256:
5734 case Intrinsic::x86_avx10_vpdpwusds_512:
5735 case Intrinsic::x86_avx2_vpdpwuud_128:
5736 case Intrinsic::x86_avx2_vpdpwuud_256:
5737 case Intrinsic::x86_avx10_vpdpwuud_512:
5738 case Intrinsic::x86_avx2_vpdpwuuds_128:
5739 case Intrinsic::x86_avx2_vpdpwuuds_256:
5740 case Intrinsic::x86_avx10_vpdpwuuds_512:
5745 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5746 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5748 NewCall = Builder.CreateCall(NewFn, Args);
5751 assert(NewCall &&
"Should have either set this variable or returned through "
5752 "the default case");
5759 assert(
F &&
"Illegal attempt to upgrade a non-existent intrinsic.");
5773 F->eraseFromParent();
5779 if (NumOperands == 0)
5787 if (NumOperands == 3) {
5791 Metadata *Elts2[] = {ScalarType, ScalarType,
5805 if (
Opc != Instruction::BitCast)
5809 Type *SrcTy = V->getType();
5826 if (
Opc != Instruction::BitCast)
5829 Type *SrcTy =
C->getType();
5856 if (
NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5857 auto OpIt =
find_if(ModFlags->operands(), [](
const MDNode *Flag) {
5858 if (Flag->getNumOperands() < 3)
5860 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5861 return K->getString() ==
"Debug Info Version";
5864 if (OpIt != ModFlags->op_end()) {
5865 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5872 bool BrokenDebugInfo =
false;
5875 if (!BrokenDebugInfo)
5881 M.getContext().diagnose(Diag);
5888 M.getContext().diagnose(DiagVersion);
5898 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5901 if (
F->hasFnAttribute(Attr)) {
5904 StringRef S =
F->getFnAttribute(Attr).getValueAsString();
5906 auto [Part, Rest] = S.
split(
',');
5912 const unsigned Dim = DimC -
'x';
5913 assert(Dim < 3 &&
"Unexpected dim char");
5923 F->addFnAttr(Attr, NewAttr);
5927 return S ==
"x" || S ==
"y" || S ==
"z";
5932 if (K ==
"kernel") {
5944 const unsigned Idx = (AlignIdxValuePair >> 16);
5945 const Align StackAlign =
Align(AlignIdxValuePair & 0xFFFF);
5950 if (K ==
"maxclusterrank" || K ==
"cluster_max_blocks") {
5955 if (K ==
"minctasm") {
5960 if (K ==
"maxnreg") {
5965 if (K.consume_front(
"maxntid") &&
isXYZ(K)) {
5969 if (K.consume_front(
"reqntid") &&
isXYZ(K)) {
5973 if (K.consume_front(
"cluster_dim_") &&
isXYZ(K)) {
5977 if (K ==
"grid_constant") {
5992 NamedMDNode *NamedMD = M.getNamedMetadata(
"nvvm.annotations");
5999 if (!SeenNodes.
insert(MD).second)
6006 assert((MD->getNumOperands() % 2) == 1 &&
"Invalid number of operands");
6013 for (
unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
6015 const MDOperand &V = MD->getOperand(j + 1);
6018 NewOperands.
append({K, V});
6021 if (NewOperands.
size() > 1)
6034 const char *MarkerKey =
"clang.arc.retainAutoreleasedReturnValueMarker";
6035 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
6036 if (ModRetainReleaseMarker) {
6042 ID->getString().split(ValueComp,
"#");
6043 if (ValueComp.
size() == 2) {
6044 std::string NewValue = ValueComp[0].str() +
";" + ValueComp[1].str();
6048 M.eraseNamedMetadata(ModRetainReleaseMarker);
6059 auto UpgradeToIntrinsic = [&](
const char *OldFunc,
6085 bool InvalidCast =
false;
6087 for (
unsigned I = 0, E = CI->
arg_size();
I != E; ++
I) {
6100 Arg = Builder.CreateBitCast(Arg, NewFuncTy->
getParamType(
I));
6102 Args.push_back(Arg);
6109 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
6114 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->
getType());
6127 UpgradeToIntrinsic(
"clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
6135 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
6136 {
"objc_autorelease", llvm::Intrinsic::objc_autorelease},
6137 {
"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
6138 {
"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
6139 {
"objc_autoreleaseReturnValue",
6140 llvm::Intrinsic::objc_autoreleaseReturnValue},
6141 {
"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
6142 {
"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
6143 {
"objc_initWeak", llvm::Intrinsic::objc_initWeak},
6144 {
"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
6145 {
"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
6146 {
"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
6147 {
"objc_release", llvm::Intrinsic::objc_release},
6148 {
"objc_retain", llvm::Intrinsic::objc_retain},
6149 {
"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
6150 {
"objc_retainAutoreleaseReturnValue",
6151 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
6152 {
"objc_retainAutoreleasedReturnValue",
6153 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
6154 {
"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
6155 {
"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
6156 {
"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
6157 {
"objc_unsafeClaimAutoreleasedReturnValue",
6158 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
6159 {
"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
6160 {
"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
6161 {
"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
6162 {
"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
6163 {
"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
6164 {
"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
6165 {
"objc_arc_annotation_topdown_bbstart",
6166 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
6167 {
"objc_arc_annotation_topdown_bbend",
6168 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
6169 {
"objc_arc_annotation_bottomup_bbstart",
6170 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
6171 {
"objc_arc_annotation_bottomup_bbend",
6172 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
6174 for (
auto &
I : RuntimeFuncs)
6175 UpgradeToIntrinsic(
I.first,
I.second);
6179 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6183 bool HasObjCFlag =
false, HasClassProperties =
false,
Changed =
false;
6184 bool HasSwiftVersionFlag =
false;
6185 uint8_t SwiftMajorVersion, SwiftMinorVersion;
6192 if (
Op->getNumOperands() != 3)
6206 if (
ID->getString() ==
"Objective-C Image Info Version")
6208 if (
ID->getString() ==
"Objective-C Class Properties")
6209 HasClassProperties =
true;
6211 if (
ID->getString() ==
"PIC Level") {
6212 if (
auto *Behavior =
6214 uint64_t V = Behavior->getLimitedValue();
6220 if (
ID->getString() ==
"PIE Level")
6221 if (
auto *Behavior =
6228 if (
ID->getString() ==
"branch-target-enforcement" ||
6229 ID->getString().starts_with(
"sign-return-address")) {
6230 if (
auto *Behavior =
6236 Op->getOperand(1),
Op->getOperand(2)};
6246 if (
ID->getString() ==
"Objective-C Image Info Section") {
6249 Value->getString().split(ValueComp,
" ");
6250 if (ValueComp.
size() != 1) {
6251 std::string NewValue;
6252 for (
auto &S : ValueComp)
6253 NewValue += S.str();
6264 if (
ID->getString() ==
"Objective-C Garbage Collection") {
6267 assert(Md->getValue() &&
"Expected non-empty metadata");
6268 auto Type = Md->getValue()->getType();
6271 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6272 if ((Val & 0xff) != Val) {
6273 HasSwiftVersionFlag =
true;
6274 SwiftABIVersion = (Val & 0xff00) >> 8;
6275 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6276 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6287 if (
ID->getString() ==
"amdgpu_code_object_version") {
6290 MDString::get(M.getContext(),
"amdhsa_code_object_version"),
6302 if (HasObjCFlag && !HasClassProperties) {
6308 if (HasSwiftVersionFlag) {
6312 ConstantInt::get(Int8Ty, SwiftMajorVersion));
6314 ConstantInt::get(Int8Ty, SwiftMinorVersion));
6322 auto TrimSpaces = [](
StringRef Section) -> std::string {
6324 Section.split(Components,
',');
6329 for (
auto Component : Components)
6330 OS <<
',' << Component.trim();
6335 for (
auto &GV : M.globals()) {
6336 if (!GV.hasSection())
6341 if (!Section.starts_with(
"__DATA, __objc_catlist"))
6346 GV.setSection(TrimSpaces(Section));
6362struct StrictFPUpgradeVisitor :
public InstVisitor<StrictFPUpgradeVisitor> {
6363 StrictFPUpgradeVisitor() =
default;
6366 if (!
Call.isStrictFP())
6372 Call.removeFnAttr(Attribute::StrictFP);
6373 Call.addFnAttr(Attribute::NoBuiltin);
6378struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6379 :
public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6380 AMDGPUUnsafeFPAtomicsUpgradeVisitor() =
default;
6382 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6397 if (!
F.isDeclaration() && !
F.hasFnAttribute(Attribute::StrictFP)) {
6398 StrictFPUpgradeVisitor SFPV;
6403 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6404 F.getReturnType(),
F.getAttributes().getRetAttrs()));
6405 for (
auto &Arg :
F.args())
6407 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6409 bool AddingAttrs =
false, RemovingAttrs =
false;
6410 AttrBuilder AttrsToAdd(
F.getContext());
6415 if (
Attribute A =
F.getFnAttribute(
"implicit-section-name");
6416 A.isValid() &&
A.isStringAttribute()) {
6417 F.setSection(
A.getValueAsString());
6419 RemovingAttrs =
true;
6423 A.isValid() &&
A.isStringAttribute()) {
6426 AddingAttrs = RemovingAttrs =
true;
6429 if (
Attribute A =
F.getFnAttribute(
"uniform-work-group-size");
6430 A.isValid() &&
A.isStringAttribute() && !
A.getValueAsString().empty()) {
6432 RemovingAttrs =
true;
6433 if (
A.getValueAsString() ==
"true") {
6434 AttrsToAdd.addAttribute(
"uniform-work-group-size");
6443 if (
Attribute A =
F.getFnAttribute(
"amdgpu-unsafe-fp-atomics");
6446 if (
A.getValueAsBool()) {
6447 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6453 AttrsToRemove.
addAttribute(
"amdgpu-unsafe-fp-atomics");
6454 RemovingAttrs =
true;
6461 bool HandleDenormalMode =
false;
6463 if (
Attribute Attr =
F.getFnAttribute(
"denormal-fp-math"); Attr.isValid()) {
6466 DenormalFPMath = ParsedMode;
6468 AddingAttrs = RemovingAttrs =
true;
6469 HandleDenormalMode =
true;
6473 if (
Attribute Attr =
F.getFnAttribute(
"denormal-fp-math-f32");
6477 DenormalFPMathF32 = ParsedMode;
6479 AddingAttrs = RemovingAttrs =
true;
6480 HandleDenormalMode =
true;
6484 if (HandleDenormalMode)
6485 AttrsToAdd.addDenormalFPEnvAttr(
6489 F.removeFnAttrs(AttrsToRemove);
6492 F.addFnAttrs(AttrsToAdd);
6498 if (!
F.hasFnAttribute(FnAttrName))
6499 F.addFnAttr(FnAttrName,
Value);
6506 if (!
F.hasFnAttribute(FnAttrName)) {
6508 F.addFnAttr(FnAttrName);
6510 auto A =
F.getFnAttribute(FnAttrName);
6511 if (
"false" ==
A.getValueAsString())
6512 F.removeFnAttr(FnAttrName);
6513 else if (
"true" ==
A.getValueAsString()) {
6514 F.removeFnAttr(FnAttrName);
6515 F.addFnAttr(FnAttrName);
6521 Triple T(M.getTargetTriple());
6522 if (!
T.isThumb() && !
T.isARM() && !
T.isAArch64())
6532 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6536 if (
Op->getNumOperands() != 3)
6545 uint64_t *ValPtr = IDStr ==
"branch-target-enforcement" ? &BTEValue
6546 : IDStr ==
"branch-protection-pauth-lr" ? &BPPLRValue
6547 : IDStr ==
"guarded-control-stack" ? &GCSValue
6548 : IDStr ==
"sign-return-address" ? &SRAValue
6549 : IDStr ==
"sign-return-address-all" ? &SRAALLValue
6550 : IDStr ==
"sign-return-address-with-bkey"
6556 *ValPtr = CI->getZExtValue();
6562 bool BTE = BTEValue == 1;
6563 bool BPPLR = BPPLRValue == 1;
6564 bool GCS = GCSValue == 1;
6565 bool SRA = SRAValue == 1;
6568 if (SRA && SRAALLValue == 1)
6569 SignTypeValue =
"all";
6572 if (SRA && SRABKeyValue == 1)
6573 SignKeyValue =
"b_key";
6575 for (
Function &
F : M.getFunctionList()) {
6576 if (
F.isDeclaration())
6583 if (
auto A =
F.getFnAttribute(
"sign-return-address");
6584 A.isValid() &&
"none" ==
A.getValueAsString()) {
6585 F.removeFnAttr(
"sign-return-address");
6586 F.removeFnAttr(
"sign-return-address-key");
6602 if (SRAALLValue == 1)
6604 if (SRABKeyValue == 1)
6613 if (
T->getNumOperands() < 1)
6618 return S->getString().starts_with(
"llvm.vectorizer.");
6622 StringRef OldPrefix =
"llvm.vectorizer.";
6625 if (OldTag ==
"llvm.vectorizer.unroll")
6637 if (
T->getNumOperands() < 1)
6642 if (!OldTag->getString().starts_with(
"llvm.vectorizer."))
6647 Ops.reserve(
T->getNumOperands());
6649 for (
unsigned I = 1,
E =
T->getNumOperands();
I !=
E; ++
I)
6650 Ops.push_back(
T->getOperand(
I));
6664 Ops.reserve(
T->getNumOperands());
6675 if ((
T.isSPIR() || (
T.isSPIRV() && !
T.isSPIRVLogical())) &&
6676 !
DL.contains(
"-G") && !
DL.starts_with(
"G")) {
6677 return DL.empty() ? std::string(
"G1") : (
DL +
"-G1").str();
6680 if (
T.isLoongArch64() ||
T.isRISCV64()) {
6682 auto I =
DL.find(
"-n64-");
6684 return (
DL.take_front(
I) +
"-n32:64-" +
DL.drop_front(
I + 5)).str();
6689 std::string Res =
DL.str();
6692 if (!
DL.contains(
"-G") && !
DL.starts_with(
"G"))
6693 Res.append(Res.empty() ?
"G1" :
"-G1");
6701 if (!
DL.contains(
"-ni") && !
DL.starts_with(
"ni"))
6702 Res.append(
"-ni:7:8:9");
6704 if (
DL.ends_with(
"ni:7"))
6706 if (
DL.ends_with(
"ni:7:8"))
6711 if (!
DL.contains(
"-p7") && !
DL.starts_with(
"p7"))
6712 Res.append(
"-p7:160:256:256:32");
6713 if (!
DL.contains(
"-p8") && !
DL.starts_with(
"p8"))
6714 Res.append(
"-p8:128:128:128:48");
6715 constexpr StringRef OldP8(
"-p8:128:128-");
6716 if (
DL.contains(OldP8))
6717 Res.replace(Res.find(OldP8), OldP8.
size(),
"-p8:128:128:128:48-");
6718 if (!
DL.contains(
"-p9") && !
DL.starts_with(
"p9"))
6719 Res.append(
"-p9:192:256:256:32");
6723 if (!
DL.contains(
"m:e"))
6724 Res = Res.empty() ?
"m:e" :
"m:e-" + Res;
6729 if (
T.isSystemZ() && !
DL.empty()) {
6731 if (!
DL.contains(
"-S64"))
6732 return "E-S64" +
DL.drop_front(1).str();
6736 auto AddPtr32Ptr64AddrSpaces = [&
DL, &Res]() {
6739 StringRef AddrSpaces{
"-p270:32:32-p271:32:32-p272:64:64"};
6740 if (!
DL.contains(AddrSpaces)) {
6742 Regex R(
"^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6743 if (R.match(Res, &
Groups))
6749 if (
T.isAArch64()) {
6751 if (!
DL.empty() && !
DL.contains(
"-Fn32"))
6752 Res.append(
"-Fn32");
6753 AddPtr32Ptr64AddrSpaces();
6757 if (
T.isSPARC() || (
T.isMIPS64() && !
DL.contains(
"m:m")) ||
T.isPPC64() ||
6761 std::string I64 =
"-i64:64";
6762 std::string I128 =
"-i128:128";
6764 size_t Pos = Res.find(I64);
6765 if (Pos !=
size_t(-1))
6766 Res.insert(Pos + I64.size(), I128);
6770 if (
T.isPPC() &&
T.isOSAIX() && !
DL.contains(
"f64:32:64") && !
DL.empty()) {
6771 size_t Pos = Res.find(
"-S128");
6774 Res.insert(Pos,
"-f64:32:64");
6780 AddPtr32Ptr64AddrSpaces();
6788 if (!
T.isOSIAMCU()) {
6789 std::string I128 =
"-i128:128";
6792 Regex R(
"^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6793 if (R.match(Res, &
Groups))
6801 if (
T.isWindowsMSVCEnvironment() && !
T.isArch64Bit()) {
6803 auto I =
Ref.find(
"-f80:32-");
6805 Res = (
Ref.take_front(
I) +
"-f80:128-" +
Ref.drop_front(
I + 8)).str();
6813 Attribute A =
B.getAttribute(
"no-frame-pointer-elim");
6816 FramePointer =
A.getValueAsString() ==
"true" ?
"all" :
"none";
6817 B.removeAttribute(
"no-frame-pointer-elim");
6819 if (
B.contains(
"no-frame-pointer-elim-non-leaf")) {
6821 if (FramePointer !=
"all")
6822 FramePointer =
"non-leaf";
6823 B.removeAttribute(
"no-frame-pointer-elim-non-leaf");
6825 if (!FramePointer.
empty())
6826 B.addAttribute(
"frame-pointer", FramePointer);
6828 A =
B.getAttribute(
"null-pointer-is-valid");
6831 bool NullPointerIsValid =
A.getValueAsString() ==
"true";
6832 B.removeAttribute(
"null-pointer-is-valid");
6833 if (NullPointerIsValid)
6834 B.addAttribute(Attribute::NullPointerIsValid);
6837 A =
B.getAttribute(
"uniform-work-group-size");
6841 bool IsTrue = Val ==
"true";
6842 B.removeAttribute(
"uniform-work-group-size");
6844 B.addAttribute(
"uniform-work-group-size");
6855 return OBD.
getTag() ==
"clang.arc.attachedcall" &&
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static bool convertIntrinsicValidType(StringRef Name, const FunctionType *FuncTy)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeVectorSplice(CallBase *CI, IRBuilder<> &Builder)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static void reportFatalUsageErrorWithCI(StringRef reason, CallBase *CI)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static Value * upgradeConvertIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)
static MDNode * getDebugLocSafe(const Instruction *I)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
NVPTX address space definition.
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Class for arbitrary precision integers.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
This class stores enough information to efficiently remove some attributes from an existing AttrBuild...
AttributeMask & addAttribute(Attribute::AttrKind Val)
Add an attribute to the mask.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Convenience struct for specifying and reasoning about fast-math flags.
void setApproxFunc(bool B=true)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
const Function & getFunction() const
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Type * getReturnType() const
Returns the type of the ret val.
Argument * getArg(unsigned i) const
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
LLVM_ABI SyncScope::ID getOrInsertSyncScopeID(StringRef SSN)
getOrInsertSyncScopeID - Maps synchronization scope name to synchronization scope ID.
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
const MDOperand & getOperand(unsigned I) const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
unsigned getNumOperands() const
Return number of MDNode operands.
LLVMContext & getContext() const
Tracking metadata reference owned by Metadata.
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
A Module instance is used to store all the information related to an LLVM module.
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
@ Min
Takes the min of the two values, which are required to be integers.
@ Max
Takes the max of the two values, which are required to be integers.
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
static constexpr size_t npos
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
Get the string size.
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< user_iterator > users()
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
const ParentTy * getParent() const
self_iterator getIterator()
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool isSignatureValid(Intrinsic::ID ID, FunctionType *FT, SmallVectorImpl< Type * > &OverloadTys, raw_ostream &OS=nulls())
Returns true if FT is a valid function type for intrinsic ID.
LLVM_ABI bool hasStructReturnType(ID id)
Returns true if id has a struct return type.
@ ADDRESS_SPACE_SHARED_CLUSTER
constexpr StringLiteral GridConstant("nvvm.grid_constant")
constexpr StringLiteral MaxNTID("nvvm.maxntid")
constexpr StringLiteral MaxNReg("nvvm.maxnreg")
constexpr StringLiteral MinCTASm("nvvm.minctasm")
constexpr StringLiteral ReqNTID("nvvm.reqntid")
constexpr StringLiteral MaxClusterRank("nvvm.maxclusterrank")
constexpr StringLiteral ClusterDim("nvvm.cluster_dim")
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
FunctionAddr VTableAddr uintptr_t uintptr_t Version
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
OperandBundleDefT< Value * > OperandBundleDef
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
DenormalMode parseDenormalFPAttribute(StringRef Str)
Returns the denormal mode to use for inputs and outputs.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ Default
The result value is uniform if and only if all operands are uniform.
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Represents the full denormal controls for a function, including the default mode and the f32 specific...
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getInvalid()
constexpr bool isValid() const
static constexpr DenormalMode getIEEE()
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.