34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsAMDGPU.h"
36#include "llvm/IR/IntrinsicsARM.h"
37#include "llvm/IR/IntrinsicsNVPTX.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
39#include "llvm/IR/IntrinsicsWebAssembly.h"
40#include "llvm/IR/IntrinsicsX86.h"
62 cl::desc(
"Disable autoupgrade of debug info"));
72 Type *Arg0Type =
F->getFunctionType()->getParamType(0);
87 Type *LastArgType =
F->getFunctionType()->getParamType(
88 F->getFunctionType()->getNumParams() - 1);
103 if (
F->getReturnType()->isVectorTy())
116 Type *Arg1Type =
F->getFunctionType()->getParamType(1);
117 Type *Arg2Type =
F->getFunctionType()->getParamType(2);
134 Type *Arg1Type =
F->getFunctionType()->getParamType(1);
135 Type *Arg2Type =
F->getFunctionType()->getParamType(2);
149 if (
F->getReturnType()->getScalarType()->isBFloatTy())
159 if (
F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
173 if (Name.consume_front(
"avx."))
174 return (Name.starts_with(
"blend.p") ||
175 Name ==
"cvt.ps2.pd.256" ||
176 Name ==
"cvtdq2.pd.256" ||
177 Name ==
"cvtdq2.ps.256" ||
178 Name.starts_with(
"movnt.") ||
179 Name.starts_with(
"sqrt.p") ||
180 Name.starts_with(
"storeu.") ||
181 Name.starts_with(
"vbroadcast.s") ||
182 Name.starts_with(
"vbroadcastf128") ||
183 Name.starts_with(
"vextractf128.") ||
184 Name.starts_with(
"vinsertf128.") ||
185 Name.starts_with(
"vperm2f128.") ||
186 Name.starts_with(
"vpermil."));
188 if (Name.consume_front(
"avx2."))
189 return (Name ==
"movntdqa" ||
190 Name.starts_with(
"pabs.") ||
191 Name.starts_with(
"padds.") ||
192 Name.starts_with(
"paddus.") ||
193 Name.starts_with(
"pblendd.") ||
195 Name.starts_with(
"pbroadcast") ||
196 Name.starts_with(
"pcmpeq.") ||
197 Name.starts_with(
"pcmpgt.") ||
198 Name.starts_with(
"pmax") ||
199 Name.starts_with(
"pmin") ||
200 Name.starts_with(
"pmovsx") ||
201 Name.starts_with(
"pmovzx") ||
203 Name ==
"pmulu.dq" ||
204 Name.starts_with(
"psll.dq") ||
205 Name.starts_with(
"psrl.dq") ||
206 Name.starts_with(
"psubs.") ||
207 Name.starts_with(
"psubus.") ||
208 Name.starts_with(
"vbroadcast") ||
209 Name ==
"vbroadcasti128" ||
210 Name ==
"vextracti128" ||
211 Name ==
"vinserti128" ||
212 Name ==
"vperm2i128");
214 if (Name.consume_front(
"avx512.")) {
215 if (Name.consume_front(
"mask."))
217 return (Name.starts_with(
"add.p") ||
218 Name.starts_with(
"and.") ||
219 Name.starts_with(
"andn.") ||
220 Name.starts_with(
"broadcast.s") ||
221 Name.starts_with(
"broadcastf32x4.") ||
222 Name.starts_with(
"broadcastf32x8.") ||
223 Name.starts_with(
"broadcastf64x2.") ||
224 Name.starts_with(
"broadcastf64x4.") ||
225 Name.starts_with(
"broadcasti32x4.") ||
226 Name.starts_with(
"broadcasti32x8.") ||
227 Name.starts_with(
"broadcasti64x2.") ||
228 Name.starts_with(
"broadcasti64x4.") ||
229 Name.starts_with(
"cmp.b") ||
230 Name.starts_with(
"cmp.d") ||
231 Name.starts_with(
"cmp.q") ||
232 Name.starts_with(
"cmp.w") ||
233 Name.starts_with(
"compress.b") ||
234 Name.starts_with(
"compress.d") ||
235 Name.starts_with(
"compress.p") ||
236 Name.starts_with(
"compress.q") ||
237 Name.starts_with(
"compress.store.") ||
238 Name.starts_with(
"compress.w") ||
239 Name.starts_with(
"conflict.") ||
240 Name.starts_with(
"cvtdq2pd.") ||
241 Name.starts_with(
"cvtdq2ps.") ||
242 Name ==
"cvtpd2dq.256" ||
243 Name ==
"cvtpd2ps.256" ||
244 Name ==
"cvtps2pd.128" ||
245 Name ==
"cvtps2pd.256" ||
246 Name.starts_with(
"cvtqq2pd.") ||
247 Name ==
"cvtqq2ps.256" ||
248 Name ==
"cvtqq2ps.512" ||
249 Name ==
"cvttpd2dq.256" ||
250 Name ==
"cvttps2dq.128" ||
251 Name ==
"cvttps2dq.256" ||
252 Name.starts_with(
"cvtudq2pd.") ||
253 Name.starts_with(
"cvtudq2ps.") ||
254 Name.starts_with(
"cvtuqq2pd.") ||
255 Name ==
"cvtuqq2ps.256" ||
256 Name ==
"cvtuqq2ps.512" ||
257 Name.starts_with(
"dbpsadbw.") ||
258 Name.starts_with(
"div.p") ||
259 Name.starts_with(
"expand.b") ||
260 Name.starts_with(
"expand.d") ||
261 Name.starts_with(
"expand.load.") ||
262 Name.starts_with(
"expand.p") ||
263 Name.starts_with(
"expand.q") ||
264 Name.starts_with(
"expand.w") ||
265 Name.starts_with(
"fpclass.p") ||
266 Name.starts_with(
"insert") ||
267 Name.starts_with(
"load.") ||
268 Name.starts_with(
"loadu.") ||
269 Name.starts_with(
"lzcnt.") ||
270 Name.starts_with(
"max.p") ||
271 Name.starts_with(
"min.p") ||
272 Name.starts_with(
"movddup") ||
273 Name.starts_with(
"move.s") ||
274 Name.starts_with(
"movshdup") ||
275 Name.starts_with(
"movsldup") ||
276 Name.starts_with(
"mul.p") ||
277 Name.starts_with(
"or.") ||
278 Name.starts_with(
"pabs.") ||
279 Name.starts_with(
"packssdw.") ||
280 Name.starts_with(
"packsswb.") ||
281 Name.starts_with(
"packusdw.") ||
282 Name.starts_with(
"packuswb.") ||
283 Name.starts_with(
"padd.") ||
284 Name.starts_with(
"padds.") ||
285 Name.starts_with(
"paddus.") ||
286 Name.starts_with(
"palignr.") ||
287 Name.starts_with(
"pand.") ||
288 Name.starts_with(
"pandn.") ||
289 Name.starts_with(
"pavg") ||
290 Name.starts_with(
"pbroadcast") ||
291 Name.starts_with(
"pcmpeq.") ||
292 Name.starts_with(
"pcmpgt.") ||
293 Name.starts_with(
"perm.df.") ||
294 Name.starts_with(
"perm.di.") ||
295 Name.starts_with(
"permvar.") ||
296 Name.starts_with(
"pmaddubs.w.") ||
297 Name.starts_with(
"pmaddw.d.") ||
298 Name.starts_with(
"pmax") ||
299 Name.starts_with(
"pmin") ||
300 Name ==
"pmov.qd.256" ||
301 Name ==
"pmov.qd.512" ||
302 Name ==
"pmov.wb.256" ||
303 Name ==
"pmov.wb.512" ||
304 Name.starts_with(
"pmovsx") ||
305 Name.starts_with(
"pmovzx") ||
306 Name.starts_with(
"pmul.dq.") ||
307 Name.starts_with(
"pmul.hr.sw.") ||
308 Name.starts_with(
"pmulh.w.") ||
309 Name.starts_with(
"pmulhu.w.") ||
310 Name.starts_with(
"pmull.") ||
311 Name.starts_with(
"pmultishift.qb.") ||
312 Name.starts_with(
"pmulu.dq.") ||
313 Name.starts_with(
"por.") ||
314 Name.starts_with(
"prol.") ||
315 Name.starts_with(
"prolv.") ||
316 Name.starts_with(
"pror.") ||
317 Name.starts_with(
"prorv.") ||
318 Name.starts_with(
"pshuf.b.") ||
319 Name.starts_with(
"pshuf.d.") ||
320 Name.starts_with(
"pshufh.w.") ||
321 Name.starts_with(
"pshufl.w.") ||
322 Name.starts_with(
"psll.d") ||
323 Name.starts_with(
"psll.q") ||
324 Name.starts_with(
"psll.w") ||
325 Name.starts_with(
"pslli") ||
326 Name.starts_with(
"psllv") ||
327 Name.starts_with(
"psra.d") ||
328 Name.starts_with(
"psra.q") ||
329 Name.starts_with(
"psra.w") ||
330 Name.starts_with(
"psrai") ||
331 Name.starts_with(
"psrav") ||
332 Name.starts_with(
"psrl.d") ||
333 Name.starts_with(
"psrl.q") ||
334 Name.starts_with(
"psrl.w") ||
335 Name.starts_with(
"psrli") ||
336 Name.starts_with(
"psrlv") ||
337 Name.starts_with(
"psub.") ||
338 Name.starts_with(
"psubs.") ||
339 Name.starts_with(
"psubus.") ||
340 Name.starts_with(
"pternlog.") ||
341 Name.starts_with(
"punpckh") ||
342 Name.starts_with(
"punpckl") ||
343 Name.starts_with(
"pxor.") ||
344 Name.starts_with(
"shuf.f") ||
345 Name.starts_with(
"shuf.i") ||
346 Name.starts_with(
"shuf.p") ||
347 Name.starts_with(
"sqrt.p") ||
348 Name.starts_with(
"store.b.") ||
349 Name.starts_with(
"store.d.") ||
350 Name.starts_with(
"store.p") ||
351 Name.starts_with(
"store.q.") ||
352 Name.starts_with(
"store.w.") ||
353 Name ==
"store.ss" ||
354 Name.starts_with(
"storeu.") ||
355 Name.starts_with(
"sub.p") ||
356 Name.starts_with(
"ucmp.") ||
357 Name.starts_with(
"unpckh.") ||
358 Name.starts_with(
"unpckl.") ||
359 Name.starts_with(
"valign.") ||
360 Name ==
"vcvtph2ps.128" ||
361 Name ==
"vcvtph2ps.256" ||
362 Name.starts_with(
"vextract") ||
363 Name.starts_with(
"vfmadd.") ||
364 Name.starts_with(
"vfmaddsub.") ||
365 Name.starts_with(
"vfnmadd.") ||
366 Name.starts_with(
"vfnmsub.") ||
367 Name.starts_with(
"vpdpbusd.") ||
368 Name.starts_with(
"vpdpbusds.") ||
369 Name.starts_with(
"vpdpwssd.") ||
370 Name.starts_with(
"vpdpwssds.") ||
371 Name.starts_with(
"vpermi2var.") ||
372 Name.starts_with(
"vpermil.p") ||
373 Name.starts_with(
"vpermilvar.") ||
374 Name.starts_with(
"vpermt2var.") ||
375 Name.starts_with(
"vpmadd52") ||
376 Name.starts_with(
"vpshld.") ||
377 Name.starts_with(
"vpshldv.") ||
378 Name.starts_with(
"vpshrd.") ||
379 Name.starts_with(
"vpshrdv.") ||
380 Name.starts_with(
"vpshufbitqmb.") ||
381 Name.starts_with(
"xor."));
383 if (Name.consume_front(
"mask3."))
385 return (Name.starts_with(
"vfmadd.") ||
386 Name.starts_with(
"vfmaddsub.") ||
387 Name.starts_with(
"vfmsub.") ||
388 Name.starts_with(
"vfmsubadd.") ||
389 Name.starts_with(
"vfnmsub."));
391 if (Name.consume_front(
"maskz."))
393 return (Name.starts_with(
"pternlog.") ||
394 Name.starts_with(
"vfmadd.") ||
395 Name.starts_with(
"vfmaddsub.") ||
396 Name.starts_with(
"vpdpbusd.") ||
397 Name.starts_with(
"vpdpbusds.") ||
398 Name.starts_with(
"vpdpwssd.") ||
399 Name.starts_with(
"vpdpwssds.") ||
400 Name.starts_with(
"vpermt2var.") ||
401 Name.starts_with(
"vpmadd52") ||
402 Name.starts_with(
"vpshldv.") ||
403 Name.starts_with(
"vpshrdv."));
406 return (Name ==
"movntdqa" ||
407 Name ==
"pmul.dq.512" ||
408 Name ==
"pmulu.dq.512" ||
409 Name.starts_with(
"broadcastm") ||
410 Name.starts_with(
"cmp.p") ||
411 Name.starts_with(
"cvtb2mask.") ||
412 Name.starts_with(
"cvtd2mask.") ||
413 Name.starts_with(
"cvtmask2") ||
414 Name.starts_with(
"cvtq2mask.") ||
415 Name ==
"cvtusi2sd" ||
416 Name.starts_with(
"cvtw2mask.") ||
421 Name ==
"kortestc.w" ||
422 Name ==
"kortestz.w" ||
423 Name.starts_with(
"kunpck") ||
426 Name.starts_with(
"padds.") ||
427 Name.starts_with(
"pbroadcast") ||
428 Name.starts_with(
"prol") ||
429 Name.starts_with(
"pror") ||
430 Name.starts_with(
"psll.dq") ||
431 Name.starts_with(
"psrl.dq") ||
432 Name.starts_with(
"psubs.") ||
433 Name.starts_with(
"ptestm") ||
434 Name.starts_with(
"ptestnm") ||
435 Name.starts_with(
"storent.") ||
436 Name.starts_with(
"vbroadcast.s") ||
437 Name.starts_with(
"vpshld.") ||
438 Name.starts_with(
"vpshrd."));
441 if (Name.consume_front(
"fma."))
442 return (Name.starts_with(
"vfmadd.") ||
443 Name.starts_with(
"vfmsub.") ||
444 Name.starts_with(
"vfmsubadd.") ||
445 Name.starts_with(
"vfnmadd.") ||
446 Name.starts_with(
"vfnmsub."));
448 if (Name.consume_front(
"fma4."))
449 return Name.starts_with(
"vfmadd.s");
451 if (Name.consume_front(
"sse."))
452 return (Name ==
"add.ss" ||
453 Name ==
"cvtsi2ss" ||
454 Name ==
"cvtsi642ss" ||
457 Name.starts_with(
"sqrt.p") ||
459 Name.starts_with(
"storeu.") ||
462 if (Name.consume_front(
"sse2."))
463 return (Name ==
"add.sd" ||
464 Name ==
"cvtdq2pd" ||
465 Name ==
"cvtdq2ps" ||
466 Name ==
"cvtps2pd" ||
467 Name ==
"cvtsi2sd" ||
468 Name ==
"cvtsi642sd" ||
469 Name ==
"cvtss2sd" ||
472 Name.starts_with(
"padds.") ||
473 Name.starts_with(
"paddus.") ||
474 Name.starts_with(
"pcmpeq.") ||
475 Name.starts_with(
"pcmpgt.") ||
480 Name ==
"pmulu.dq" ||
481 Name.starts_with(
"pshuf") ||
482 Name.starts_with(
"psll.dq") ||
483 Name.starts_with(
"psrl.dq") ||
484 Name.starts_with(
"psubs.") ||
485 Name.starts_with(
"psubus.") ||
486 Name.starts_with(
"sqrt.p") ||
488 Name ==
"storel.dq" ||
489 Name.starts_with(
"storeu.") ||
492 if (Name.consume_front(
"sse41."))
493 return (Name.starts_with(
"blendp") ||
494 Name ==
"movntdqa" ||
504 Name.starts_with(
"pmovsx") ||
505 Name.starts_with(
"pmovzx") ||
508 if (Name.consume_front(
"sse42."))
509 return Name ==
"crc32.64.8";
511 if (Name.consume_front(
"sse4a."))
512 return Name.starts_with(
"movnt.");
514 if (Name.consume_front(
"ssse3."))
515 return (Name ==
"pabs.b.128" ||
516 Name ==
"pabs.d.128" ||
517 Name ==
"pabs.w.128");
519 if (Name.consume_front(
"xop."))
520 return (Name ==
"vpcmov" ||
521 Name ==
"vpcmov.256" ||
522 Name.starts_with(
"vpcom") ||
523 Name.starts_with(
"vprot"));
525 return (Name ==
"addcarry.u32" ||
526 Name ==
"addcarry.u64" ||
527 Name ==
"addcarryx.u32" ||
528 Name ==
"addcarryx.u64" ||
529 Name ==
"subborrow.u32" ||
530 Name ==
"subborrow.u64" ||
531 Name.starts_with(
"vcvtph2ps."));
537 if (!Name.consume_front(
"x86."))
545 if (Name ==
"rdtscp") {
547 if (
F->getFunctionType()->getNumParams() == 0)
552 Intrinsic::x86_rdtscp);
559 if (Name.consume_front(
"sse41.ptest")) {
561 .
Case(
"c", Intrinsic::x86_sse41_ptestc)
562 .
Case(
"z", Intrinsic::x86_sse41_ptestz)
563 .
Case(
"nzc", Intrinsic::x86_sse41_ptestnzc)
576 .
Case(
"sse41.insertps", Intrinsic::x86_sse41_insertps)
577 .
Case(
"sse41.dppd", Intrinsic::x86_sse41_dppd)
578 .
Case(
"sse41.dpps", Intrinsic::x86_sse41_dpps)
579 .
Case(
"sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
580 .
Case(
"avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
581 .
Case(
"avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
586 if (Name.consume_front(
"avx512.")) {
587 if (Name.consume_front(
"mask.cmp.")) {
590 .
Case(
"pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
591 .
Case(
"pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
592 .
Case(
"pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
593 .
Case(
"ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
594 .
Case(
"ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
595 .
Case(
"ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
599 }
else if (Name.starts_with(
"vpdpbusd.") ||
600 Name.starts_with(
"vpdpbusds.")) {
603 .
Case(
"vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
604 .
Case(
"vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
605 .
Case(
"vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
606 .
Case(
"vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
607 .
Case(
"vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
608 .
Case(
"vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
612 }
else if (Name.starts_with(
"vpdpwssd.") ||
613 Name.starts_with(
"vpdpwssds.")) {
616 .
Case(
"vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)
617 .
Case(
"vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)
618 .
Case(
"vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)
619 .
Case(
"vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)
620 .
Case(
"vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)
621 .
Case(
"vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)
629 if (Name.consume_front(
"avx2.")) {
630 if (Name.consume_front(
"vpdpb")) {
633 .
Case(
"ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
634 .
Case(
"ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
635 .
Case(
"ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
636 .
Case(
"ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
637 .
Case(
"sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
638 .
Case(
"sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
639 .
Case(
"suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
640 .
Case(
"suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
641 .
Case(
"uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
642 .
Case(
"uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
643 .
Case(
"uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
644 .
Case(
"uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
648 }
else if (Name.consume_front(
"vpdpw")) {
651 .
Case(
"sud.128", Intrinsic::x86_avx2_vpdpwsud_128)
652 .
Case(
"sud.256", Intrinsic::x86_avx2_vpdpwsud_256)
653 .
Case(
"suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)
654 .
Case(
"suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)
655 .
Case(
"usd.128", Intrinsic::x86_avx2_vpdpwusd_128)
656 .
Case(
"usd.256", Intrinsic::x86_avx2_vpdpwusd_256)
657 .
Case(
"usds.128", Intrinsic::x86_avx2_vpdpwusds_128)
658 .
Case(
"usds.256", Intrinsic::x86_avx2_vpdpwusds_256)
659 .
Case(
"uud.128", Intrinsic::x86_avx2_vpdpwuud_128)
660 .
Case(
"uud.256", Intrinsic::x86_avx2_vpdpwuud_256)
661 .
Case(
"uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)
662 .
Case(
"uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)
670 if (Name.consume_front(
"avx10.")) {
671 if (Name.consume_front(
"vpdpb")) {
674 .
Case(
"ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
675 .
Case(
"ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
676 .
Case(
"sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
677 .
Case(
"suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
678 .
Case(
"uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
679 .
Case(
"uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
683 }
else if (Name.consume_front(
"vpdpw")) {
685 .
Case(
"sud.512", Intrinsic::x86_avx10_vpdpwsud_512)
686 .
Case(
"suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)
687 .
Case(
"usd.512", Intrinsic::x86_avx10_vpdpwusd_512)
688 .
Case(
"usds.512", Intrinsic::x86_avx10_vpdpwusds_512)
689 .
Case(
"uud.512", Intrinsic::x86_avx10_vpdpwuud_512)
690 .
Case(
"uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)
698 if (Name.consume_front(
"avx512bf16.")) {
701 .
Case(
"cvtne2ps2bf16.128",
702 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
703 .
Case(
"cvtne2ps2bf16.256",
704 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
705 .
Case(
"cvtne2ps2bf16.512",
706 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
707 .
Case(
"mask.cvtneps2bf16.128",
708 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
709 .
Case(
"cvtneps2bf16.256",
710 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
711 .
Case(
"cvtneps2bf16.512",
712 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
719 .
Case(
"dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
720 .
Case(
"dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
721 .
Case(
"dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
728 if (Name.consume_front(
"xop.")) {
730 if (Name.starts_with(
"vpermil2")) {
733 auto Idx =
F->getFunctionType()->getParamType(2);
734 if (Idx->isFPOrFPVectorTy()) {
735 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
736 unsigned EltSize = Idx->getScalarSizeInBits();
737 if (EltSize == 64 && IdxSize == 128)
738 ID = Intrinsic::x86_xop_vpermil2pd;
739 else if (EltSize == 32 && IdxSize == 128)
740 ID = Intrinsic::x86_xop_vpermil2ps;
741 else if (EltSize == 64 && IdxSize == 256)
742 ID = Intrinsic::x86_xop_vpermil2pd_256;
744 ID = Intrinsic::x86_xop_vpermil2ps_256;
746 }
else if (
F->arg_size() == 2)
749 .
Case(
"vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
750 .
Case(
"vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
761 if (Name ==
"seh.recoverfp") {
763 Intrinsic::eh_recoverfp);
775 if (Name.starts_with(
"rbit")) {
778 F->getParent(), Intrinsic::bitreverse,
F->arg_begin()->getType());
782 if (Name ==
"thread.pointer") {
785 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
789 bool Neon = Name.consume_front(
"neon.");
794 if (Name.consume_front(
"bfdot.")) {
798 .
Cases({
"v2f32.v8i8",
"v4f32.v16i8"},
803 size_t OperandWidth =
F->getReturnType()->getPrimitiveSizeInBits();
804 assert((OperandWidth == 64 || OperandWidth == 128) &&
805 "Unexpected operand width");
807 std::array<Type *, 2> Tys{
818 if (Name.consume_front(
"bfm")) {
820 if (Name.consume_back(
".v4f32.v16i8")) {
866 F->arg_begin()->getType());
870 if (Name.consume_front(
"vst")) {
872 static const Regex vstRegex(
"^([1234]|[234]lane)\\.v[a-z0-9]*$");
876 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
877 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
880 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
881 Intrinsic::arm_neon_vst4lane};
883 auto fArgs =
F->getFunctionType()->params();
884 Type *Tys[] = {fArgs[0], fArgs[1]};
887 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
890 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
899 if (Name.consume_front(
"mve.")) {
901 if (Name ==
"vctp64") {
911 if (Name.starts_with(
"vrintn.v")) {
913 F->getParent(), Intrinsic::roundeven,
F->arg_begin()->getType());
918 if (Name.consume_back(
".v4i1")) {
920 if (Name.consume_back(
".predicated.v2i64.v4i32"))
922 return Name ==
"mull.int" || Name ==
"vqdmull";
924 if (Name.consume_back(
".v2i64")) {
926 bool IsGather = Name.consume_front(
"vldr.gather.");
927 if (IsGather || Name.consume_front(
"vstr.scatter.")) {
928 if (Name.consume_front(
"base.")) {
930 Name.consume_front(
"wb.");
933 return Name ==
"predicated.v2i64";
936 if (Name.consume_front(
"offset.predicated."))
937 return Name == (IsGather ?
"v2i64.p0i64" :
"p0i64.v2i64") ||
938 Name == (IsGather ?
"v2i64.p0" :
"p0.v2i64");
951 if (Name.consume_front(
"cde.vcx")) {
953 if (Name.consume_back(
".predicated.v2i64.v4i1"))
955 return Name ==
"1q" || Name ==
"1qa" || Name ==
"2q" || Name ==
"2qa" ||
956 Name ==
"3q" || Name ==
"3qa";
970 F->arg_begin()->getType());
974 if (Name.starts_with(
"addp")) {
976 if (
F->arg_size() != 2)
979 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
981 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
987 if (Name.starts_with(
"bfcvt")) {
994 if (Name.consume_front(
"sve.")) {
996 if (Name.consume_front(
"bf")) {
997 if (Name.consume_back(
".lane")) {
1001 .
Case(
"dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
1002 .
Case(
"mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1003 .
Case(
"mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1015 if (Name ==
"fcvt.bf16f32" || Name ==
"fcvtnt.bf16f32") {
1020 if (Name.consume_front(
"addqv")) {
1022 if (!
F->getReturnType()->isFPOrFPVectorTy())
1025 auto Args =
F->getFunctionType()->params();
1026 Type *Tys[] = {
F->getReturnType(), Args[1]};
1028 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
1032 if (Name.consume_front(
"ld")) {
1034 static const Regex LdRegex(
"^[234](.nxv[a-z0-9]+|$)");
1035 if (LdRegex.
match(Name)) {
1042 Intrinsic::aarch64_sve_ld2_sret,
1043 Intrinsic::aarch64_sve_ld3_sret,
1044 Intrinsic::aarch64_sve_ld4_sret,
1047 LoadIDs[Name[0] -
'2'], Ty);
1053 if (Name.consume_front(
"tuple.")) {
1055 if (Name.starts_with(
"get")) {
1057 Type *Tys[] = {
F->getReturnType(),
F->arg_begin()->getType()};
1059 F->getParent(), Intrinsic::vector_extract, Tys);
1063 if (Name.starts_with(
"set")) {
1065 auto Args =
F->getFunctionType()->params();
1066 Type *Tys[] = {Args[0], Args[2], Args[1]};
1068 F->getParent(), Intrinsic::vector_insert, Tys);
1072 static const Regex CreateTupleRegex(
"^create[234](.nxv[a-z0-9]+|$)");
1073 if (CreateTupleRegex.
match(Name)) {
1075 auto Args =
F->getFunctionType()->params();
1076 Type *Tys[] = {
F->getReturnType(), Args[1]};
1078 F->getParent(), Intrinsic::vector_insert, Tys);
1084 if (Name.starts_with(
"rev.nxv")) {
1087 F->getParent(), Intrinsic::vector_reverse,
F->getReturnType());
1099 if (Name.consume_front(
"cp.async.bulk.tensor.g2s.")) {
1103 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1105 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1107 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1108 .
Case(
"tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1109 .
Case(
"tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1110 .
Case(
"tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1111 .
Case(
"tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1112 .
Case(
"tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1121 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1135 size_t FlagStartIndex =
F->getFunctionType()->getNumParams() - 3;
1136 Type *ArgType =
F->getFunctionType()->getParamType(FlagStartIndex);
1146 if (Name.consume_front(
"mapa.shared.cluster"))
1147 if (
F->getReturnType()->getPointerAddressSpace() ==
1149 return Intrinsic::nvvm_mapa_shared_cluster;
1151 if (Name.consume_front(
"cp.async.bulk.")) {
1154 .
Case(
"global.to.shared.cluster",
1155 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1156 .
Case(
"shared.cta.to.cluster",
1157 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1161 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1170 if (Name.consume_front(
"fma.rn."))
1172 .
Case(
"bf16", Intrinsic::nvvm_fma_rn_bf16)
1173 .
Case(
"bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1174 .
Case(
"relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1175 .
Case(
"relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1178 if (Name.consume_front(
"fmax."))
1180 .
Case(
"bf16", Intrinsic::nvvm_fmax_bf16)
1181 .
Case(
"bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1182 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1183 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1184 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1185 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1186 .
Case(
"ftz.nan.xorsign.abs.bf16",
1187 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1188 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1189 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1190 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1191 .
Case(
"ftz.xorsign.abs.bf16x2",
1192 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1193 .
Case(
"nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1194 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1195 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1196 .
Case(
"nan.xorsign.abs.bf16x2",
1197 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1198 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1199 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1202 if (Name.consume_front(
"fmin."))
1204 .
Case(
"bf16", Intrinsic::nvvm_fmin_bf16)
1205 .
Case(
"bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1206 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1207 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1208 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1209 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1210 .
Case(
"ftz.nan.xorsign.abs.bf16",
1211 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1212 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1213 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1214 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1215 .
Case(
"ftz.xorsign.abs.bf16x2",
1216 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1217 .
Case(
"nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1218 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1219 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1220 .
Case(
"nan.xorsign.abs.bf16x2",
1221 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1222 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1223 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1226 if (Name.consume_front(
"neg."))
1228 .
Case(
"bf16", Intrinsic::nvvm_neg_bf16)
1229 .
Case(
"bf16x2", Intrinsic::nvvm_neg_bf16x2)
1236 return Name.consume_front(
"local") || Name.consume_front(
"shared") ||
1237 Name.consume_front(
"global") || Name.consume_front(
"constant") ||
1238 Name.consume_front(
"param");
1244 if (Name.starts_with(
"to.fp16")) {
1248 FuncTy->getReturnType());
1251 if (Name.starts_with(
"from.fp16")) {
1255 FuncTy->getReturnType());
1262 bool CanUpgradeDebugIntrinsicsToRecords) {
1263 assert(
F &&
"Illegal to upgrade a non-existent Function.");
1268 if (!Name.consume_front(
"llvm.") || Name.empty())
1274 bool IsArm = Name.consume_front(
"arm.");
1275 if (IsArm || Name.consume_front(
"aarch64.")) {
1281 if (Name.consume_front(
"amdgcn.")) {
1282 if (Name ==
"alignbit") {
1285 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1289 if (Name.consume_front(
"atomic.")) {
1290 if (Name.starts_with(
"inc") || Name.starts_with(
"dec") ||
1291 Name.starts_with(
"cond.sub") || Name.starts_with(
"csub")) {
1301 if (
F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8 &&
1302 F->arg_size() == 7) {
1306 if (
F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8 &&
1307 F->arg_size() == 8) {
1312 if (Name.consume_front(
"ds.") || Name.consume_front(
"global.atomic.") ||
1313 Name.consume_front(
"flat.atomic.")) {
1314 if (Name.starts_with(
"fadd") ||
1316 (Name.starts_with(
"fmin") && !Name.starts_with(
"fmin.num")) ||
1317 (Name.starts_with(
"fmax") && !Name.starts_with(
"fmax.num"))) {
1325 if (Name.starts_with(
"ldexp.")) {
1328 F->getParent(), Intrinsic::ldexp,
1329 {F->getReturnType(), F->getArg(1)->getType()});
1338 if (
F->arg_size() == 1) {
1339 if (Name.consume_front(
"convert.")) {
1353 F->arg_begin()->getType());
1358 if (
F->arg_size() == 2 && Name ==
"coro.end") {
1361 Intrinsic::coro_end);
1368 if (Name.consume_front(
"dbg.")) {
1370 if (CanUpgradeDebugIntrinsicsToRecords) {
1371 if (Name ==
"addr" || Name ==
"value" || Name ==
"assign" ||
1372 Name ==
"declare" || Name ==
"label") {
1381 if (Name ==
"addr" || (Name ==
"value" &&
F->arg_size() == 4)) {
1384 Intrinsic::dbg_value);
1391 if (Name.consume_front(
"experimental.vector.")) {
1397 .
StartsWith(
"extract.", Intrinsic::vector_extract)
1398 .
StartsWith(
"insert.", Intrinsic::vector_insert)
1399 .
StartsWith(
"reverse.", Intrinsic::vector_reverse)
1400 .
StartsWith(
"interleave2.", Intrinsic::vector_interleave2)
1401 .
StartsWith(
"deinterleave2.", Intrinsic::vector_deinterleave2)
1403 Intrinsic::vector_partial_reduce_add)
1406 const auto *FT =
F->getFunctionType();
1408 if (
ID == Intrinsic::vector_extract ||
1409 ID == Intrinsic::vector_interleave2)
1412 if (
ID != Intrinsic::vector_interleave2)
1414 if (
ID == Intrinsic::vector_insert ||
1415 ID == Intrinsic::vector_partial_reduce_add)
1423 if (Name.consume_front(
"reduce.")) {
1425 static const Regex R(
"^([a-z]+)\\.[a-z][0-9]+");
1426 if (R.match(Name, &
Groups))
1428 .
Case(
"add", Intrinsic::vector_reduce_add)
1429 .
Case(
"mul", Intrinsic::vector_reduce_mul)
1430 .
Case(
"and", Intrinsic::vector_reduce_and)
1431 .
Case(
"or", Intrinsic::vector_reduce_or)
1432 .
Case(
"xor", Intrinsic::vector_reduce_xor)
1433 .
Case(
"smax", Intrinsic::vector_reduce_smax)
1434 .
Case(
"smin", Intrinsic::vector_reduce_smin)
1435 .
Case(
"umax", Intrinsic::vector_reduce_umax)
1436 .
Case(
"umin", Intrinsic::vector_reduce_umin)
1437 .
Case(
"fmax", Intrinsic::vector_reduce_fmax)
1438 .
Case(
"fmin", Intrinsic::vector_reduce_fmin)
1443 static const Regex R2(
"^v2\\.([a-z]+)\\.[fi][0-9]+");
1448 .
Case(
"fadd", Intrinsic::vector_reduce_fadd)
1449 .
Case(
"fmul", Intrinsic::vector_reduce_fmul)
1454 auto Args =
F->getFunctionType()->params();
1456 {Args[V2 ? 1 : 0]});
1462 if (Name.consume_front(
"splice"))
1466 if (Name.consume_front(
"experimental.stepvector.")) {
1470 F->getParent(),
ID,
F->getFunctionType()->getReturnType());
1475 if (Name.starts_with(
"flt.rounds")) {
1478 Intrinsic::get_rounding);
1483 if (Name.starts_with(
"invariant.group.barrier")) {
1485 auto Args =
F->getFunctionType()->params();
1486 Type* ObjectPtr[1] = {Args[0]};
1489 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1494 if ((Name.starts_with(
"lifetime.start") ||
1495 Name.starts_with(
"lifetime.end")) &&
1496 F->arg_size() == 2) {
1498 ? Intrinsic::lifetime_start
1499 : Intrinsic::lifetime_end;
1502 F->getArg(0)->getType());
1511 .StartsWith(
"memcpy.", Intrinsic::memcpy)
1512 .StartsWith(
"memmove.", Intrinsic::memmove)
1514 if (
F->arg_size() == 5) {
1518 F->getFunctionType()->params().slice(0, 3);
1524 if (Name.starts_with(
"memset.") &&
F->arg_size() == 5) {
1527 const auto *FT =
F->getFunctionType();
1528 Type *ParamTypes[2] = {
1529 FT->getParamType(0),
1533 Intrinsic::memset, ParamTypes);
1539 .
StartsWith(
"masked.load", Intrinsic::masked_load)
1540 .
StartsWith(
"masked.gather", Intrinsic::masked_gather)
1541 .
StartsWith(
"masked.store", Intrinsic::masked_store)
1542 .
StartsWith(
"masked.scatter", Intrinsic::masked_scatter)
1544 if (MaskedID &&
F->arg_size() == 4) {
1546 if (MaskedID == Intrinsic::masked_load ||
1547 MaskedID == Intrinsic::masked_gather) {
1549 F->getParent(), MaskedID,
1550 {F->getReturnType(), F->getArg(0)->getType()});
1554 F->getParent(), MaskedID,
1555 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1561 if (Name.consume_front(
"nvvm.")) {
1563 if (
F->arg_size() == 1) {
1566 .
Cases({
"brev32",
"brev64"}, Intrinsic::bitreverse)
1567 .Case(
"clz.i", Intrinsic::ctlz)
1568 .
Case(
"popc.i", Intrinsic::ctpop)
1572 {F->getReturnType()});
1575 }
else if (
F->arg_size() == 2) {
1578 .
Cases({
"max.s",
"max.i",
"max.ll"}, Intrinsic::smax)
1579 .Cases({
"min.s",
"min.i",
"min.ll"}, Intrinsic::smin)
1580 .Cases({
"max.us",
"max.ui",
"max.ull"}, Intrinsic::umax)
1581 .Cases({
"min.us",
"min.ui",
"min.ull"}, Intrinsic::umin)
1585 {F->getReturnType()});
1591 if (!
F->getReturnType()->getScalarType()->isBFloatTy()) {
1619 bool Expand =
false;
1620 if (Name.consume_front(
"abs."))
1623 Name ==
"i" || Name ==
"ll" || Name ==
"bf16" || Name ==
"bf16x2";
1624 else if (Name.consume_front(
"fabs."))
1626 Expand = Name ==
"f" || Name ==
"ftz.f" || Name ==
"d";
1627 else if (Name.consume_front(
"ex2.approx."))
1630 Name ==
"f" || Name ==
"ftz.f" || Name ==
"d" || Name ==
"f16x2";
1631 else if (Name.consume_front(
"atomic.load."))
1640 else if (Name.consume_front(
"bitcast."))
1643 Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" || Name ==
"d2ll";
1644 else if (Name.consume_front(
"rotate."))
1646 Expand = Name ==
"b32" || Name ==
"b64" || Name ==
"right.b64";
1647 else if (Name.consume_front(
"ptr.gen.to."))
1650 else if (Name.consume_front(
"ptr."))
1653 else if (Name.consume_front(
"ldg.global."))
1655 Expand = (Name.starts_with(
"i.") || Name.starts_with(
"f.") ||
1656 Name.starts_with(
"p."));
1659 .
Case(
"barrier0",
true)
1660 .
Case(
"barrier.n",
true)
1661 .
Case(
"barrier.sync.cnt",
true)
1662 .
Case(
"barrier.sync",
true)
1663 .
Case(
"barrier",
true)
1664 .
Case(
"bar.sync",
true)
1665 .
Case(
"barrier0.popc",
true)
1666 .
Case(
"barrier0.and",
true)
1667 .
Case(
"barrier0.or",
true)
1668 .
Case(
"clz.ll",
true)
1669 .
Case(
"popc.ll",
true)
1671 .
Case(
"swap.lo.hi.b64",
true)
1672 .
Case(
"tanh.approx.f32",
true)
1684 if (Name.starts_with(
"objectsize.")) {
1685 Type *Tys[2] = {
F->getReturnType(),
F->arg_begin()->getType() };
1686 if (
F->arg_size() == 2 ||
F->arg_size() == 3) {
1689 Intrinsic::objectsize, Tys);
1696 if (Name.starts_with(
"ptr.annotation.") &&
F->arg_size() == 4) {
1699 F->getParent(), Intrinsic::ptr_annotation,
1700 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1706 if (Name.consume_front(
"riscv.")) {
1709 .
Case(
"aes32dsi", Intrinsic::riscv_aes32dsi)
1710 .
Case(
"aes32dsmi", Intrinsic::riscv_aes32dsmi)
1711 .
Case(
"aes32esi", Intrinsic::riscv_aes32esi)
1712 .
Case(
"aes32esmi", Intrinsic::riscv_aes32esmi)
1715 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1728 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1729 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1738 .
StartsWith(
"sha256sig0", Intrinsic::riscv_sha256sig0)
1739 .
StartsWith(
"sha256sig1", Intrinsic::riscv_sha256sig1)
1740 .
StartsWith(
"sha256sum0", Intrinsic::riscv_sha256sum0)
1741 .
StartsWith(
"sha256sum1", Intrinsic::riscv_sha256sum1)
1746 if (
F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1755 if (Name ==
"clmul.i32" || Name ==
"clmul.i64") {
1757 F->getParent(), Intrinsic::clmul, {F->getReturnType()});
1766 if (Name ==
"stackprotectorcheck") {
1773 if (Name ==
"thread.pointer") {
1775 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
1781 if (Name ==
"var.annotation" &&
F->arg_size() == 4) {
1784 F->getParent(), Intrinsic::var_annotation,
1785 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1788 if (Name.consume_front(
"vector.splice")) {
1789 if (Name.starts_with(
".left") || Name.starts_with(
".right"))
1797 if (Name.consume_front(
"wasm.")) {
1800 .
StartsWith(
"fma.", Intrinsic::wasm_relaxed_madd)
1801 .
StartsWith(
"fms.", Intrinsic::wasm_relaxed_nmadd)
1802 .
StartsWith(
"laneselect.", Intrinsic::wasm_relaxed_laneselect)
1807 F->getReturnType());
1811 if (Name.consume_front(
"dot.i8x16.i7x16.")) {
1813 .
Case(
"signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1815 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1834 if (ST && (!
ST->isLiteral() ||
ST->isPacked()) &&
1843 auto *FT =
F->getFunctionType();
1846 std::string
Name =
F->getName().str();
1849 Name,
F->getParent());
1860 if (Result != std::nullopt) {
1873 bool CanUpgradeDebugIntrinsicsToRecords) {
1893 GV->
getName() ==
"llvm.global_dtors")) ||
1908 unsigned N =
Init->getNumOperands();
1909 std::vector<Constant *> NewCtors(
N);
1910 for (
unsigned i = 0; i !=
N; ++i) {
1913 Ctor->getAggregateElement(1),
1927 unsigned NumElts = ResultTy->getNumElements() * 8;
1931 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
1941 for (
unsigned l = 0; l != NumElts; l += 16)
1942 for (
unsigned i = 0; i != 16; ++i) {
1943 unsigned Idx = NumElts + i - Shift;
1945 Idx -= NumElts - 16;
1946 Idxs[l + i] = Idx + l;
1949 Res = Builder.CreateShuffleVector(Res,
Op,
ArrayRef(Idxs, NumElts));
1953 return Builder.CreateBitCast(Res, ResultTy,
"cast");
1961 unsigned NumElts = ResultTy->getNumElements() * 8;
1965 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
1975 for (
unsigned l = 0; l != NumElts; l += 16)
1976 for (
unsigned i = 0; i != 16; ++i) {
1977 unsigned Idx = i + Shift;
1979 Idx += NumElts - 16;
1980 Idxs[l + i] = Idx + l;
1983 Res = Builder.CreateShuffleVector(
Op, Res,
ArrayRef(Idxs, NumElts));
1987 return Builder.CreateBitCast(Res, ResultTy,
"cast");
1995 Mask = Builder.CreateBitCast(Mask, MaskTy);
2001 for (
unsigned i = 0; i != NumElts; ++i)
2003 Mask = Builder.CreateShuffleVector(Mask, Mask,
ArrayRef(Indices, NumElts),
2014 if (
C->isAllOnesValue())
2019 return Builder.CreateSelect(Mask, Op0, Op1);
2026 if (
C->isAllOnesValue())
2030 Mask->getType()->getIntegerBitWidth());
2031 Mask = Builder.CreateBitCast(Mask, MaskTy);
2032 Mask = Builder.CreateExtractElement(Mask, (
uint64_t)0);
2033 return Builder.CreateSelect(Mask, Op0, Op1);
2046 assert((IsVALIGN || NumElts % 16 == 0) &&
"Illegal NumElts for PALIGNR!");
2047 assert((!IsVALIGN || NumElts <= 16) &&
"NumElts too large for VALIGN!");
2052 ShiftVal &= (NumElts - 1);
2061 if (ShiftVal > 16) {
2069 for (
unsigned l = 0; l < NumElts; l += 16) {
2070 for (
unsigned i = 0; i != 16; ++i) {
2071 unsigned Idx = ShiftVal + i;
2072 if (!IsVALIGN && Idx >= 16)
2073 Idx += NumElts - 16;
2074 Indices[l + i] = Idx + l;
2079 Op1, Op0,
ArrayRef(Indices, NumElts),
"palignr");
2085 bool ZeroMask,
bool IndexForm) {
2088 unsigned EltWidth = Ty->getScalarSizeInBits();
2089 bool IsFloat = Ty->isFPOrFPVectorTy();
2091 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2092 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2093 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2094 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2095 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2096 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2097 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2098 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2099 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2100 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2101 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2102 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2103 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2104 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2105 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2106 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2107 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2108 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2109 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2110 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2111 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2112 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2113 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2114 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2115 else if (VecWidth == 128 && EltWidth == 16)
2116 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2117 else if (VecWidth == 256 && EltWidth == 16)
2118 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2119 else if (VecWidth == 512 && EltWidth == 16)
2120 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2121 else if (VecWidth == 128 && EltWidth == 8)
2122 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2123 else if (VecWidth == 256 && EltWidth == 8)
2124 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2125 else if (VecWidth == 512 && EltWidth == 8)
2126 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2137 Value *V = Builder.CreateIntrinsic(IID, Args);
2149 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2160 bool IsRotateRight) {
2170 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
2171 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2174 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2175 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2220 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2225 bool IsShiftRight,
bool ZeroMask) {
2239 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
2240 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2243 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2244 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2259 const Align Alignment =
2261 ?
Align(
Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2266 if (
C->isAllOnesValue())
2267 return Builder.CreateAlignedStore(
Data, Ptr, Alignment);
2272 return Builder.CreateMaskedStore(
Data, Ptr, Alignment, Mask);
2278 const Align Alignment =
2287 if (
C->isAllOnesValue())
2288 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2293 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2299 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2300 {Op0, Builder.getInt1(
false)});
2315 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2316 LHS = Builder.CreateShl(
LHS, ShiftAmt);
2317 LHS = Builder.CreateAShr(
LHS, ShiftAmt);
2318 RHS = Builder.CreateShl(
RHS, ShiftAmt);
2319 RHS = Builder.CreateAShr(
RHS, ShiftAmt);
2322 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2323 LHS = Builder.CreateAnd(
LHS, Mask);
2324 RHS = Builder.CreateAnd(
RHS, Mask);
2341 if (!
C || !
C->isAllOnesValue())
2342 Vec = Builder.CreateAnd(Vec,
getX86MaskVec(Builder, Mask, NumElts));
2347 for (
unsigned i = 0; i != NumElts; ++i)
2349 for (
unsigned i = NumElts; i != 8; ++i)
2350 Indices[i] = NumElts + i % NumElts;
2351 Vec = Builder.CreateShuffleVector(Vec,
2355 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2359 unsigned CC,
bool Signed) {
2367 }
else if (CC == 7) {
2403 Value* AndNode = Builder.CreateAnd(Mask,
APInt(8, 1));
2404 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2406 Value* Extract2 = Builder.CreateExtractElement(Src, (
uint64_t)0);
2407 Value*
Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2416 return Builder.CreateSExt(Mask, ReturnOp,
"vpmovm2");
2422 Name = Name.substr(12);
2427 if (Name.starts_with(
"max.p")) {
2428 if (VecWidth == 128 && EltWidth == 32)
2429 IID = Intrinsic::x86_sse_max_ps;
2430 else if (VecWidth == 128 && EltWidth == 64)
2431 IID = Intrinsic::x86_sse2_max_pd;
2432 else if (VecWidth == 256 && EltWidth == 32)
2433 IID = Intrinsic::x86_avx_max_ps_256;
2434 else if (VecWidth == 256 && EltWidth == 64)
2435 IID = Intrinsic::x86_avx_max_pd_256;
2438 }
else if (Name.starts_with(
"min.p")) {
2439 if (VecWidth == 128 && EltWidth == 32)
2440 IID = Intrinsic::x86_sse_min_ps;
2441 else if (VecWidth == 128 && EltWidth == 64)
2442 IID = Intrinsic::x86_sse2_min_pd;
2443 else if (VecWidth == 256 && EltWidth == 32)
2444 IID = Intrinsic::x86_avx_min_ps_256;
2445 else if (VecWidth == 256 && EltWidth == 64)
2446 IID = Intrinsic::x86_avx_min_pd_256;
2449 }
else if (Name.starts_with(
"pshuf.b.")) {
2450 if (VecWidth == 128)
2451 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2452 else if (VecWidth == 256)
2453 IID = Intrinsic::x86_avx2_pshuf_b;
2454 else if (VecWidth == 512)
2455 IID = Intrinsic::x86_avx512_pshuf_b_512;
2458 }
else if (Name.starts_with(
"pmul.hr.sw.")) {
2459 if (VecWidth == 128)
2460 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2461 else if (VecWidth == 256)
2462 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2463 else if (VecWidth == 512)
2464 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2467 }
else if (Name.starts_with(
"pmulh.w.")) {
2468 if (VecWidth == 128)
2469 IID = Intrinsic::x86_sse2_pmulh_w;
2470 else if (VecWidth == 256)
2471 IID = Intrinsic::x86_avx2_pmulh_w;
2472 else if (VecWidth == 512)
2473 IID = Intrinsic::x86_avx512_pmulh_w_512;
2476 }
else if (Name.starts_with(
"pmulhu.w.")) {
2477 if (VecWidth == 128)
2478 IID = Intrinsic::x86_sse2_pmulhu_w;
2479 else if (VecWidth == 256)
2480 IID = Intrinsic::x86_avx2_pmulhu_w;
2481 else if (VecWidth == 512)
2482 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2485 }
else if (Name.starts_with(
"pmaddw.d.")) {
2486 if (VecWidth == 128)
2487 IID = Intrinsic::x86_sse2_pmadd_wd;
2488 else if (VecWidth == 256)
2489 IID = Intrinsic::x86_avx2_pmadd_wd;
2490 else if (VecWidth == 512)
2491 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2494 }
else if (Name.starts_with(
"pmaddubs.w.")) {
2495 if (VecWidth == 128)
2496 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2497 else if (VecWidth == 256)
2498 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2499 else if (VecWidth == 512)
2500 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2503 }
else if (Name.starts_with(
"packsswb.")) {
2504 if (VecWidth == 128)
2505 IID = Intrinsic::x86_sse2_packsswb_128;
2506 else if (VecWidth == 256)
2507 IID = Intrinsic::x86_avx2_packsswb;
2508 else if (VecWidth == 512)
2509 IID = Intrinsic::x86_avx512_packsswb_512;
2512 }
else if (Name.starts_with(
"packssdw.")) {
2513 if (VecWidth == 128)
2514 IID = Intrinsic::x86_sse2_packssdw_128;
2515 else if (VecWidth == 256)
2516 IID = Intrinsic::x86_avx2_packssdw;
2517 else if (VecWidth == 512)
2518 IID = Intrinsic::x86_avx512_packssdw_512;
2521 }
else if (Name.starts_with(
"packuswb.")) {
2522 if (VecWidth == 128)
2523 IID = Intrinsic::x86_sse2_packuswb_128;
2524 else if (VecWidth == 256)
2525 IID = Intrinsic::x86_avx2_packuswb;
2526 else if (VecWidth == 512)
2527 IID = Intrinsic::x86_avx512_packuswb_512;
2530 }
else if (Name.starts_with(
"packusdw.")) {
2531 if (VecWidth == 128)
2532 IID = Intrinsic::x86_sse41_packusdw;
2533 else if (VecWidth == 256)
2534 IID = Intrinsic::x86_avx2_packusdw;
2535 else if (VecWidth == 512)
2536 IID = Intrinsic::x86_avx512_packusdw_512;
2539 }
else if (Name.starts_with(
"vpermilvar.")) {
2540 if (VecWidth == 128 && EltWidth == 32)
2541 IID = Intrinsic::x86_avx_vpermilvar_ps;
2542 else if (VecWidth == 128 && EltWidth == 64)
2543 IID = Intrinsic::x86_avx_vpermilvar_pd;
2544 else if (VecWidth == 256 && EltWidth == 32)
2545 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2546 else if (VecWidth == 256 && EltWidth == 64)
2547 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2548 else if (VecWidth == 512 && EltWidth == 32)
2549 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2550 else if (VecWidth == 512 && EltWidth == 64)
2551 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2554 }
else if (Name ==
"cvtpd2dq.256") {
2555 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2556 }
else if (Name ==
"cvtpd2ps.256") {
2557 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2558 }
else if (Name ==
"cvttpd2dq.256") {
2559 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2560 }
else if (Name ==
"cvttps2dq.128") {
2561 IID = Intrinsic::x86_sse2_cvttps2dq;
2562 }
else if (Name ==
"cvttps2dq.256") {
2563 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2564 }
else if (Name.starts_with(
"permvar.")) {
2566 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2567 IID = Intrinsic::x86_avx2_permps;
2568 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2569 IID = Intrinsic::x86_avx2_permd;
2570 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2571 IID = Intrinsic::x86_avx512_permvar_df_256;
2572 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2573 IID = Intrinsic::x86_avx512_permvar_di_256;
2574 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2575 IID = Intrinsic::x86_avx512_permvar_sf_512;
2576 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2577 IID = Intrinsic::x86_avx512_permvar_si_512;
2578 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2579 IID = Intrinsic::x86_avx512_permvar_df_512;
2580 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2581 IID = Intrinsic::x86_avx512_permvar_di_512;
2582 else if (VecWidth == 128 && EltWidth == 16)
2583 IID = Intrinsic::x86_avx512_permvar_hi_128;
2584 else if (VecWidth == 256 && EltWidth == 16)
2585 IID = Intrinsic::x86_avx512_permvar_hi_256;
2586 else if (VecWidth == 512 && EltWidth == 16)
2587 IID = Intrinsic::x86_avx512_permvar_hi_512;
2588 else if (VecWidth == 128 && EltWidth == 8)
2589 IID = Intrinsic::x86_avx512_permvar_qi_128;
2590 else if (VecWidth == 256 && EltWidth == 8)
2591 IID = Intrinsic::x86_avx512_permvar_qi_256;
2592 else if (VecWidth == 512 && EltWidth == 8)
2593 IID = Intrinsic::x86_avx512_permvar_qi_512;
2596 }
else if (Name.starts_with(
"dbpsadbw.")) {
2597 if (VecWidth == 128)
2598 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2599 else if (VecWidth == 256)
2600 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2601 else if (VecWidth == 512)
2602 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2605 }
else if (Name.starts_with(
"pmultishift.qb.")) {
2606 if (VecWidth == 128)
2607 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2608 else if (VecWidth == 256)
2609 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2610 else if (VecWidth == 512)
2611 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2614 }
else if (Name.starts_with(
"conflict.")) {
2615 if (Name[9] ==
'd' && VecWidth == 128)
2616 IID = Intrinsic::x86_avx512_conflict_d_128;
2617 else if (Name[9] ==
'd' && VecWidth == 256)
2618 IID = Intrinsic::x86_avx512_conflict_d_256;
2619 else if (Name[9] ==
'd' && VecWidth == 512)
2620 IID = Intrinsic::x86_avx512_conflict_d_512;
2621 else if (Name[9] ==
'q' && VecWidth == 128)
2622 IID = Intrinsic::x86_avx512_conflict_q_128;
2623 else if (Name[9] ==
'q' && VecWidth == 256)
2624 IID = Intrinsic::x86_avx512_conflict_q_256;
2625 else if (Name[9] ==
'q' && VecWidth == 512)
2626 IID = Intrinsic::x86_avx512_conflict_q_512;
2629 }
else if (Name.starts_with(
"pavg.")) {
2630 if (Name[5] ==
'b' && VecWidth == 128)
2631 IID = Intrinsic::x86_sse2_pavg_b;
2632 else if (Name[5] ==
'b' && VecWidth == 256)
2633 IID = Intrinsic::x86_avx2_pavg_b;
2634 else if (Name[5] ==
'b' && VecWidth == 512)
2635 IID = Intrinsic::x86_avx512_pavg_b_512;
2636 else if (Name[5] ==
'w' && VecWidth == 128)
2637 IID = Intrinsic::x86_sse2_pavg_w;
2638 else if (Name[5] ==
'w' && VecWidth == 256)
2639 IID = Intrinsic::x86_avx2_pavg_w;
2640 else if (Name[5] ==
'w' && VecWidth == 512)
2641 IID = Intrinsic::x86_avx512_pavg_w_512;
2650 Rep = Builder.CreateIntrinsic(IID, Args);
2661 if (AsmStr->find(
"mov\tfp") == 0 &&
2662 AsmStr->find(
"objc_retainAutoreleaseReturnValue") != std::string::npos &&
2663 (Pos = AsmStr->find(
"# marker")) != std::string::npos) {
2664 AsmStr->replace(Pos, 1,
";");
2670 Value *Rep =
nullptr;
2672 if (Name ==
"abs.i" || Name ==
"abs.ll") {
2674 Value *Neg = Builder.CreateNeg(Arg,
"neg");
2675 Value *Cmp = Builder.CreateICmpSGE(
2677 Rep = Builder.CreateSelect(Cmp, Arg, Neg,
"abs");
2678 }
else if (Name ==
"abs.bf16" || Name ==
"abs.bf16x2") {
2679 Type *Ty = (Name ==
"abs.bf16")
2683 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2684 Rep = Builder.CreateBitCast(Abs, CI->
getType());
2685 }
else if (Name ==
"fabs.f" || Name ==
"fabs.ftz.f" || Name ==
"fabs.d") {
2686 Intrinsic::ID IID = (Name ==
"fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2687 : Intrinsic::nvvm_fabs;
2688 Rep = Builder.CreateUnaryIntrinsic(IID, CI->
getArgOperand(0));
2689 }
else if (Name.consume_front(
"ex2.approx.")) {
2691 Intrinsic::ID IID = Name.starts_with(
"ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2692 : Intrinsic::nvvm_ex2_approx;
2693 Rep = Builder.CreateUnaryIntrinsic(IID, CI->
getArgOperand(0));
2694 }
else if (Name.starts_with(
"atomic.load.add.f32.p") ||
2695 Name.starts_with(
"atomic.load.add.f64.p")) {
2700 }
else if (Name.starts_with(
"atomic.load.inc.32.p") ||
2701 Name.starts_with(
"atomic.load.dec.32.p")) {
2706 Rep = Builder.CreateAtomicRMW(
Op, Ptr, Val,
MaybeAlign(),
2708 }
else if (Name ==
"clz.ll") {
2711 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->
getType()},
2712 {Arg, Builder.getFalse()},
2714 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(),
"ctlz.trunc");
2715 }
else if (Name ==
"popc.ll") {
2719 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->
getType()},
2720 Arg,
nullptr,
"ctpop");
2721 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(),
"ctpop.trunc");
2722 }
else if (Name ==
"h2f") {
2724 Builder.CreateBitCast(CI->
getArgOperand(0), Builder.getHalfTy());
2725 Rep = Builder.CreateFPExt(Cast, Builder.getFloatTy());
2726 }
else if (Name.consume_front(
"bitcast.") &&
2727 (Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" ||
2730 }
else if (Name ==
"rotate.b32") {
2733 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2734 {Arg, Arg, ShiftAmt});
2735 }
else if (Name ==
"rotate.b64") {
2739 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2740 {Arg, Arg, ZExtShiftAmt});
2741 }
else if (Name ==
"rotate.right.b64") {
2745 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2746 {Arg, Arg, ZExtShiftAmt});
2747 }
else if (Name ==
"swap.lo.hi.b64") {
2750 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2751 {Arg, Arg, Builder.getInt64(32)});
2752 }
else if ((Name.consume_front(
"ptr.gen.to.") &&
2755 Name.starts_with(
".to.gen"))) {
2757 }
else if (Name.consume_front(
"ldg.global")) {
2761 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2764 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2766 }
else if (Name ==
"tanh.approx.f32") {
2770 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->
getArgOperand(0),
2772 }
else if (Name ==
"barrier0" || Name ==
"barrier.n" || Name ==
"bar.sync") {
2774 Name.ends_with(
'0') ? Builder.getInt32(0) : CI->
getArgOperand(0);
2775 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2777 }
else if (Name ==
"barrier") {
2778 Rep = Builder.CreateIntrinsic(
2779 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2781 }
else if (Name ==
"barrier.sync") {
2782 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2784 }
else if (Name ==
"barrier.sync.cnt") {
2785 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2787 }
else if (Name ==
"barrier0.popc" || Name ==
"barrier0.and" ||
2788 Name ==
"barrier0.or") {
2790 C = Builder.CreateICmpNE(
C, Builder.getInt32(0));
2794 .
Case(
"barrier0.popc",
2795 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2796 .
Case(
"barrier0.and",
2797 Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2798 .
Case(
"barrier0.or",
2799 Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2800 Value *Bar = Builder.CreateIntrinsic(IID, {}, {Builder.getInt32(0),
C});
2801 Rep = Builder.CreateZExt(Bar, CI->
getType());
2805 !
F->getReturnType()->getScalarType()->isBFloatTy()) {
2815 ? Builder.CreateBitCast(Arg, NewType)
2818 Rep = Builder.CreateCall(NewFn, Args);
2819 if (
F->getReturnType()->isIntegerTy())
2820 Rep = Builder.CreateBitCast(Rep,
F->getReturnType());
2830 Value *Rep =
nullptr;
2832 if (Name.starts_with(
"sse4a.movnt.")) {
2844 Builder.CreateExtractElement(Arg1, (
uint64_t)0,
"extractelement");
2847 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2848 }
else if (Name.starts_with(
"avx.movnt.") ||
2849 Name.starts_with(
"avx512.storent.")) {
2861 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2862 }
else if (Name ==
"sse2.storel.dq") {
2867 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
2868 Value *Elt = Builder.CreateExtractElement(BC0, (
uint64_t)0);
2869 Builder.CreateAlignedStore(Elt, Arg0,
Align(1));
2870 }
else if (Name.starts_with(
"sse.storeu.") ||
2871 Name.starts_with(
"sse2.storeu.") ||
2872 Name.starts_with(
"avx.storeu.")) {
2875 Builder.CreateAlignedStore(Arg1, Arg0,
Align(1));
2876 }
else if (Name ==
"avx512.mask.store.ss") {
2880 }
else if (Name.starts_with(
"avx512.mask.store")) {
2882 bool Aligned = Name[17] !=
'u';
2885 }
else if (Name.starts_with(
"sse2.pcmp") || Name.starts_with(
"avx2.pcmp")) {
2888 bool CmpEq = Name[9] ==
'e';
2891 Rep = Builder.CreateSExt(Rep, CI->
getType(),
"");
2892 }
else if (Name.starts_with(
"avx512.broadcastm")) {
2899 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2900 }
else if (Name ==
"sse.sqrt.ss" || Name ==
"sse2.sqrt.sd") {
2902 Value *Elt0 = Builder.CreateExtractElement(Vec, (
uint64_t)0);
2903 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->
getType(), Elt0);
2904 Rep = Builder.CreateInsertElement(Vec, Elt0, (
uint64_t)0);
2905 }
else if (Name.starts_with(
"avx.sqrt.p") ||
2906 Name.starts_with(
"sse2.sqrt.p") ||
2907 Name.starts_with(
"sse.sqrt.p")) {
2908 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
2909 {CI->getArgOperand(0)});
2910 }
else if (Name.starts_with(
"avx512.mask.sqrt.p")) {
2914 Intrinsic::ID IID = Name[18] ==
's' ? Intrinsic::x86_avx512_sqrt_ps_512
2915 : Intrinsic::x86_avx512_sqrt_pd_512;
2918 Rep = Builder.CreateIntrinsic(IID, Args);
2920 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
2921 {CI->getArgOperand(0)});
2925 }
else if (Name.starts_with(
"avx512.ptestm") ||
2926 Name.starts_with(
"avx512.ptestnm")) {
2930 Rep = Builder.CreateAnd(Op0, Op1);
2936 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2938 }
else if (Name.starts_with(
"avx512.mask.pbroadcast")) {
2941 Rep = Builder.CreateVectorSplat(NumElts, CI->
getArgOperand(0));
2944 }
else if (Name.starts_with(
"avx512.kunpck")) {
2949 for (
unsigned i = 0; i != NumElts; ++i)
2958 Rep = Builder.CreateShuffleVector(
RHS,
LHS,
ArrayRef(Indices, NumElts));
2959 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2960 }
else if (Name ==
"avx512.kand.w") {
2963 Rep = Builder.CreateAnd(
LHS,
RHS);
2964 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2965 }
else if (Name ==
"avx512.kandn.w") {
2968 LHS = Builder.CreateNot(
LHS);
2969 Rep = Builder.CreateAnd(
LHS,
RHS);
2970 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2971 }
else if (Name ==
"avx512.kor.w") {
2974 Rep = Builder.CreateOr(
LHS,
RHS);
2975 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2976 }
else if (Name ==
"avx512.kxor.w") {
2979 Rep = Builder.CreateXor(
LHS,
RHS);
2980 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2981 }
else if (Name ==
"avx512.kxnor.w") {
2984 LHS = Builder.CreateNot(
LHS);
2985 Rep = Builder.CreateXor(
LHS,
RHS);
2986 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2987 }
else if (Name ==
"avx512.knot.w") {
2989 Rep = Builder.CreateNot(Rep);
2990 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2991 }
else if (Name ==
"avx512.kortestz.w" || Name ==
"avx512.kortestc.w") {
2994 Rep = Builder.CreateOr(
LHS,
RHS);
2995 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2997 if (Name[14] ==
'c')
3001 Rep = Builder.CreateICmpEQ(Rep,
C);
3002 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
3003 }
else if (Name ==
"sse.add.ss" || Name ==
"sse2.add.sd" ||
3004 Name ==
"sse.sub.ss" || Name ==
"sse2.sub.sd" ||
3005 Name ==
"sse.mul.ss" || Name ==
"sse2.mul.sd" ||
3006 Name ==
"sse.div.ss" || Name ==
"sse2.div.sd") {
3009 ConstantInt::get(I32Ty, 0));
3011 ConstantInt::get(I32Ty, 0));
3013 if (Name.contains(
".add."))
3014 EltOp = Builder.CreateFAdd(Elt0, Elt1);
3015 else if (Name.contains(
".sub."))
3016 EltOp = Builder.CreateFSub(Elt0, Elt1);
3017 else if (Name.contains(
".mul."))
3018 EltOp = Builder.CreateFMul(Elt0, Elt1);
3020 EltOp = Builder.CreateFDiv(Elt0, Elt1);
3021 Rep = Builder.CreateInsertElement(CI->
getArgOperand(0), EltOp,
3022 ConstantInt::get(I32Ty, 0));
3023 }
else if (Name.starts_with(
"avx512.mask.pcmp")) {
3025 bool CmpEq = Name[16] ==
'e';
3027 }
else if (Name.starts_with(
"avx512.mask.vpshufbitqmb.")) {
3035 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3038 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3041 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3048 }
else if (Name.starts_with(
"avx512.mask.fpclass.p")) {
3053 if (VecWidth == 128 && EltWidth == 32)
3054 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3055 else if (VecWidth == 256 && EltWidth == 32)
3056 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3057 else if (VecWidth == 512 && EltWidth == 32)
3058 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3059 else if (VecWidth == 128 && EltWidth == 64)
3060 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3061 else if (VecWidth == 256 && EltWidth == 64)
3062 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3063 else if (VecWidth == 512 && EltWidth == 64)
3064 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3071 }
else if (Name.starts_with(
"avx512.cmp.p")) {
3073 Type *OpTy = Args[0]->getType();
3077 if (VecWidth == 128 && EltWidth == 32)
3078 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3079 else if (VecWidth == 256 && EltWidth == 32)
3080 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3081 else if (VecWidth == 512 && EltWidth == 32)
3082 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3083 else if (VecWidth == 128 && EltWidth == 64)
3084 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3085 else if (VecWidth == 256 && EltWidth == 64)
3086 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3087 else if (VecWidth == 512 && EltWidth == 64)
3088 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3093 if (VecWidth == 512)
3095 Args.push_back(Mask);
3097 Rep = Builder.CreateIntrinsic(IID, Args);
3098 }
else if (Name.starts_with(
"avx512.mask.cmp.")) {
3102 }
else if (Name.starts_with(
"avx512.mask.ucmp.")) {
3105 }
else if (Name.starts_with(
"avx512.cvtb2mask.") ||
3106 Name.starts_with(
"avx512.cvtw2mask.") ||
3107 Name.starts_with(
"avx512.cvtd2mask.") ||
3108 Name.starts_with(
"avx512.cvtq2mask.")) {
3113 }
else if (Name ==
"ssse3.pabs.b.128" || Name ==
"ssse3.pabs.w.128" ||
3114 Name ==
"ssse3.pabs.d.128" || Name.starts_with(
"avx2.pabs") ||
3115 Name.starts_with(
"avx512.mask.pabs")) {
3117 }
else if (Name ==
"sse41.pmaxsb" || Name ==
"sse2.pmaxs.w" ||
3118 Name ==
"sse41.pmaxsd" || Name.starts_with(
"avx2.pmaxs") ||
3119 Name.starts_with(
"avx512.mask.pmaxs")) {
3121 }
else if (Name ==
"sse2.pmaxu.b" || Name ==
"sse41.pmaxuw" ||
3122 Name ==
"sse41.pmaxud" || Name.starts_with(
"avx2.pmaxu") ||
3123 Name.starts_with(
"avx512.mask.pmaxu")) {
3125 }
else if (Name ==
"sse41.pminsb" || Name ==
"sse2.pmins.w" ||
3126 Name ==
"sse41.pminsd" || Name.starts_with(
"avx2.pmins") ||
3127 Name.starts_with(
"avx512.mask.pmins")) {
3129 }
else if (Name ==
"sse2.pminu.b" || Name ==
"sse41.pminuw" ||
3130 Name ==
"sse41.pminud" || Name.starts_with(
"avx2.pminu") ||
3131 Name.starts_with(
"avx512.mask.pminu")) {
3133 }
else if (Name ==
"sse2.pmulu.dq" || Name ==
"avx2.pmulu.dq" ||
3134 Name ==
"avx512.pmulu.dq.512" ||
3135 Name.starts_with(
"avx512.mask.pmulu.dq.")) {
3137 }
else if (Name ==
"sse41.pmuldq" || Name ==
"avx2.pmul.dq" ||
3138 Name ==
"avx512.pmul.dq.512" ||
3139 Name.starts_with(
"avx512.mask.pmul.dq.")) {
3141 }
else if (Name ==
"sse.cvtsi2ss" || Name ==
"sse2.cvtsi2sd" ||
3142 Name ==
"sse.cvtsi642ss" || Name ==
"sse2.cvtsi642sd") {
3147 }
else if (Name ==
"avx512.cvtusi2sd") {
3152 }
else if (Name ==
"sse2.cvtss2sd") {
3154 Rep = Builder.CreateFPExt(
3157 }
else if (Name ==
"sse2.cvtdq2pd" || Name ==
"sse2.cvtdq2ps" ||
3158 Name ==
"avx.cvtdq2.pd.256" || Name ==
"avx.cvtdq2.ps.256" ||
3159 Name.starts_with(
"avx512.mask.cvtdq2pd.") ||
3160 Name.starts_with(
"avx512.mask.cvtudq2pd.") ||
3161 Name.starts_with(
"avx512.mask.cvtdq2ps.") ||
3162 Name.starts_with(
"avx512.mask.cvtudq2ps.") ||
3163 Name.starts_with(
"avx512.mask.cvtqq2pd.") ||
3164 Name.starts_with(
"avx512.mask.cvtuqq2pd.") ||
3165 Name ==
"avx512.mask.cvtqq2ps.256" ||
3166 Name ==
"avx512.mask.cvtqq2ps.512" ||
3167 Name ==
"avx512.mask.cvtuqq2ps.256" ||
3168 Name ==
"avx512.mask.cvtuqq2ps.512" || Name ==
"sse2.cvtps2pd" ||
3169 Name ==
"avx.cvt.ps2.pd.256" ||
3170 Name ==
"avx512.mask.cvtps2pd.128" ||
3171 Name ==
"avx512.mask.cvtps2pd.256") {
3176 unsigned NumDstElts = DstTy->getNumElements();
3178 assert(NumDstElts == 2 &&
"Unexpected vector size");
3179 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1});
3182 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3183 bool IsUnsigned = Name.contains(
"cvtu");
3185 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtps2pd");
3189 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3190 : Intrinsic::x86_avx512_sitofp_round;
3191 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3194 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy,
"cvt")
3195 : Builder.CreateSIToFP(Rep, DstTy,
"cvt");
3201 }
else if (Name.starts_with(
"avx512.mask.vcvtph2ps.") ||
3202 Name.starts_with(
"vcvtph2ps.")) {
3206 unsigned NumDstElts = DstTy->getNumElements();
3207 if (NumDstElts != SrcTy->getNumElements()) {
3208 assert(NumDstElts == 4 &&
"Unexpected vector size");
3209 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1, 2, 3});
3211 Rep = Builder.CreateBitCast(
3213 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtph2ps");
3217 }
else if (Name.starts_with(
"avx512.mask.load")) {
3219 bool Aligned = Name[16] !=
'u';
3222 }
else if (Name.starts_with(
"avx512.mask.expand.load.")) {
3225 ResultTy->getNumElements());
3227 Rep = Builder.CreateIntrinsic(
3228 Intrinsic::masked_expandload, ResultTy,
3230 }
else if (Name.starts_with(
"avx512.mask.compress.store.")) {
3236 Rep = Builder.CreateIntrinsic(
3237 Intrinsic::masked_compressstore, ResultTy,
3239 }
else if (Name.starts_with(
"avx512.mask.compress.") ||
3240 Name.starts_with(
"avx512.mask.expand.")) {
3244 ResultTy->getNumElements());
3246 bool IsCompress = Name[12] ==
'c';
3247 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3248 : Intrinsic::x86_avx512_mask_expand;
3249 Rep = Builder.CreateIntrinsic(
3251 }
else if (Name.starts_with(
"xop.vpcom")) {
3253 if (Name.ends_with(
"ub") || Name.ends_with(
"uw") || Name.ends_with(
"ud") ||
3254 Name.ends_with(
"uq"))
3256 else if (Name.ends_with(
"b") || Name.ends_with(
"w") ||
3257 Name.ends_with(
"d") || Name.ends_with(
"q"))
3266 Name = Name.substr(9);
3267 if (Name.starts_with(
"lt"))
3269 else if (Name.starts_with(
"le"))
3271 else if (Name.starts_with(
"gt"))
3273 else if (Name.starts_with(
"ge"))
3275 else if (Name.starts_with(
"eq"))
3277 else if (Name.starts_with(
"ne"))
3279 else if (Name.starts_with(
"false"))
3281 else if (Name.starts_with(
"true"))
3288 }
else if (Name.starts_with(
"xop.vpcmov")) {
3290 Value *NotSel = Builder.CreateNot(Sel);
3293 Rep = Builder.CreateOr(Sel0, Sel1);
3294 }
else if (Name.starts_with(
"xop.vprot") || Name.starts_with(
"avx512.prol") ||
3295 Name.starts_with(
"avx512.mask.prol")) {
3297 }
else if (Name.starts_with(
"avx512.pror") ||
3298 Name.starts_with(
"avx512.mask.pror")) {
3300 }
else if (Name.starts_with(
"avx512.vpshld.") ||
3301 Name.starts_with(
"avx512.mask.vpshld") ||
3302 Name.starts_with(
"avx512.maskz.vpshld")) {
3303 bool ZeroMask = Name[11] ==
'z';
3305 }
else if (Name.starts_with(
"avx512.vpshrd.") ||
3306 Name.starts_with(
"avx512.mask.vpshrd") ||
3307 Name.starts_with(
"avx512.maskz.vpshrd")) {
3308 bool ZeroMask = Name[11] ==
'z';
3310 }
else if (Name ==
"sse42.crc32.64.8") {
3313 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3315 Rep = Builder.CreateZExt(Rep, CI->
getType(),
"");
3316 }
else if (Name.starts_with(
"avx.vbroadcast.s") ||
3317 Name.starts_with(
"avx512.vbroadcast.s")) {
3320 Type *EltTy = VecTy->getElementType();
3321 unsigned EltNum = VecTy->getNumElements();
3325 for (
unsigned I = 0;
I < EltNum; ++
I)
3326 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty,
I));
3327 }
else if (Name.starts_with(
"sse41.pmovsx") ||
3328 Name.starts_with(
"sse41.pmovzx") ||
3329 Name.starts_with(
"avx2.pmovsx") ||
3330 Name.starts_with(
"avx2.pmovzx") ||
3331 Name.starts_with(
"avx512.mask.pmovsx") ||
3332 Name.starts_with(
"avx512.mask.pmovzx")) {
3334 unsigned NumDstElts = DstTy->getNumElements();
3338 for (
unsigned i = 0; i != NumDstElts; ++i)
3343 bool DoSext = Name.contains(
"pmovsx");
3345 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3350 }
else if (Name ==
"avx512.mask.pmov.qd.256" ||
3351 Name ==
"avx512.mask.pmov.qd.512" ||
3352 Name ==
"avx512.mask.pmov.wb.256" ||
3353 Name ==
"avx512.mask.pmov.wb.512") {
3358 }
else if (Name.starts_with(
"avx.vbroadcastf128") ||
3359 Name ==
"avx2.vbroadcasti128") {
3365 if (NumSrcElts == 2)
3366 Rep = Builder.CreateShuffleVector(Load,
ArrayRef<int>{0, 1, 0, 1});
3368 Rep = Builder.CreateShuffleVector(Load,
3370 }
else if (Name.starts_with(
"avx512.mask.shuf.i") ||
3371 Name.starts_with(
"avx512.mask.shuf.f")) {
3376 unsigned ControlBitsMask = NumLanes - 1;
3377 unsigned NumControlBits = NumLanes / 2;
3380 for (
unsigned l = 0; l != NumLanes; ++l) {
3381 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3383 if (l >= NumLanes / 2)
3384 LaneMask += NumLanes;
3385 for (
unsigned i = 0; i != NumElementsInLane; ++i)
3386 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3392 }
else if (Name.starts_with(
"avx512.mask.broadcastf") ||
3393 Name.starts_with(
"avx512.mask.broadcasti")) {
3396 unsigned NumDstElts =
3400 for (
unsigned i = 0; i != NumDstElts; ++i)
3401 ShuffleMask[i] = i % NumSrcElts;
3407 }
else if (Name.starts_with(
"avx2.pbroadcast") ||
3408 Name.starts_with(
"avx2.vbroadcast") ||
3409 Name.starts_with(
"avx512.pbroadcast") ||
3410 Name.starts_with(
"avx512.mask.broadcast.s")) {
3417 Rep = Builder.CreateShuffleVector(
Op, M);
3422 }
else if (Name.starts_with(
"sse2.padds.") ||
3423 Name.starts_with(
"avx2.padds.") ||
3424 Name.starts_with(
"avx512.padds.") ||
3425 Name.starts_with(
"avx512.mask.padds.")) {
3427 }
else if (Name.starts_with(
"sse2.psubs.") ||
3428 Name.starts_with(
"avx2.psubs.") ||
3429 Name.starts_with(
"avx512.psubs.") ||
3430 Name.starts_with(
"avx512.mask.psubs.")) {
3432 }
else if (Name.starts_with(
"sse2.paddus.") ||
3433 Name.starts_with(
"avx2.paddus.") ||
3434 Name.starts_with(
"avx512.mask.paddus.")) {
3436 }
else if (Name.starts_with(
"sse2.psubus.") ||
3437 Name.starts_with(
"avx2.psubus.") ||
3438 Name.starts_with(
"avx512.mask.psubus.")) {
3440 }
else if (Name.starts_with(
"avx512.mask.palignr.")) {
3445 }
else if (Name.starts_with(
"avx512.mask.valign.")) {
3449 }
else if (Name ==
"sse2.psll.dq" || Name ==
"avx2.psll.dq") {
3454 }
else if (Name ==
"sse2.psrl.dq" || Name ==
"avx2.psrl.dq") {
3459 }
else if (Name ==
"sse2.psll.dq.bs" || Name ==
"avx2.psll.dq.bs" ||
3460 Name ==
"avx512.psll.dq.512") {
3464 }
else if (Name ==
"sse2.psrl.dq.bs" || Name ==
"avx2.psrl.dq.bs" ||
3465 Name ==
"avx512.psrl.dq.512") {
3469 }
else if (Name ==
"sse41.pblendw" || Name.starts_with(
"sse41.blendp") ||
3470 Name.starts_with(
"avx.blend.p") || Name ==
"avx2.pblendw" ||
3471 Name.starts_with(
"avx2.pblendd.")) {
3476 unsigned NumElts = VecTy->getNumElements();
3479 for (
unsigned i = 0; i != NumElts; ++i)
3480 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3482 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3483 }
else if (Name.starts_with(
"avx.vinsertf128.") ||
3484 Name ==
"avx2.vinserti128" ||
3485 Name.starts_with(
"avx512.mask.insert")) {
3489 unsigned DstNumElts =
3491 unsigned SrcNumElts =
3493 unsigned Scale = DstNumElts / SrcNumElts;
3500 for (
unsigned i = 0; i != SrcNumElts; ++i)
3502 for (
unsigned i = SrcNumElts; i != DstNumElts; ++i)
3503 Idxs[i] = SrcNumElts;
3504 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3518 for (
unsigned i = 0; i != DstNumElts; ++i)
3521 for (
unsigned i = 0; i != SrcNumElts; ++i)
3522 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3523 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3529 }
else if (Name.starts_with(
"avx.vextractf128.") ||
3530 Name ==
"avx2.vextracti128" ||
3531 Name.starts_with(
"avx512.mask.vextract")) {
3534 unsigned DstNumElts =
3536 unsigned SrcNumElts =
3538 unsigned Scale = SrcNumElts / DstNumElts;
3545 for (
unsigned i = 0; i != DstNumElts; ++i) {
3546 Idxs[i] = i + (Imm * DstNumElts);
3548 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3554 }
else if (Name.starts_with(
"avx512.mask.perm.df.") ||
3555 Name.starts_with(
"avx512.mask.perm.di.")) {
3559 unsigned NumElts = VecTy->getNumElements();
3562 for (
unsigned i = 0; i != NumElts; ++i)
3563 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3565 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3570 }
else if (Name.starts_with(
"avx.vperm2f128.") || Name ==
"avx2.vperm2i128") {
3582 unsigned HalfSize = NumElts / 2;
3594 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3595 for (
unsigned i = 0; i < HalfSize; ++i)
3596 ShuffleMask[i] = StartIndex + i;
3599 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3600 for (
unsigned i = 0; i < HalfSize; ++i)
3601 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3603 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3605 }
else if (Name.starts_with(
"avx.vpermil.") || Name ==
"sse2.pshuf.d" ||
3606 Name.starts_with(
"avx512.mask.vpermil.p") ||
3607 Name.starts_with(
"avx512.mask.pshuf.d.")) {
3611 unsigned NumElts = VecTy->getNumElements();
3613 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3614 unsigned IdxMask = ((1 << IdxSize) - 1);
3620 for (
unsigned i = 0; i != NumElts; ++i)
3621 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3623 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3628 }
else if (Name ==
"sse2.pshufl.w" ||
3629 Name.starts_with(
"avx512.mask.pshufl.w.")) {
3635 for (
unsigned l = 0; l != NumElts; l += 8) {
3636 for (
unsigned i = 0; i != 4; ++i)
3637 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3638 for (
unsigned i = 4; i != 8; ++i)
3639 Idxs[i + l] = i + l;
3642 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3647 }
else if (Name ==
"sse2.pshufh.w" ||
3648 Name.starts_with(
"avx512.mask.pshufh.w.")) {
3654 for (
unsigned l = 0; l != NumElts; l += 8) {
3655 for (
unsigned i = 0; i != 4; ++i)
3656 Idxs[i + l] = i + l;
3657 for (
unsigned i = 0; i != 4; ++i)
3658 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3661 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3666 }
else if (Name.starts_with(
"avx512.mask.shuf.p")) {
3673 unsigned HalfLaneElts = NumLaneElts / 2;
3676 for (
unsigned i = 0; i != NumElts; ++i) {
3678 Idxs[i] = i - (i % NumLaneElts);
3680 if ((i % NumLaneElts) >= HalfLaneElts)
3684 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3687 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3691 }
else if (Name.starts_with(
"avx512.mask.movddup") ||
3692 Name.starts_with(
"avx512.mask.movshdup") ||
3693 Name.starts_with(
"avx512.mask.movsldup")) {
3699 if (Name.starts_with(
"avx512.mask.movshdup."))
3703 for (
unsigned l = 0; l != NumElts; l += NumLaneElts)
3704 for (
unsigned i = 0; i != NumLaneElts; i += 2) {
3705 Idxs[i + l + 0] = i + l +
Offset;
3706 Idxs[i + l + 1] = i + l +
Offset;
3709 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3713 }
else if (Name.starts_with(
"avx512.mask.punpckl") ||
3714 Name.starts_with(
"avx512.mask.unpckl.")) {
3721 for (
int l = 0; l != NumElts; l += NumLaneElts)
3722 for (
int i = 0; i != NumLaneElts; ++i)
3723 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3725 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3729 }
else if (Name.starts_with(
"avx512.mask.punpckh") ||
3730 Name.starts_with(
"avx512.mask.unpckh.")) {
3737 for (
int l = 0; l != NumElts; l += NumLaneElts)
3738 for (
int i = 0; i != NumLaneElts; ++i)
3739 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3741 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3745 }
else if (Name.starts_with(
"avx512.mask.and.") ||
3746 Name.starts_with(
"avx512.mask.pand.")) {
3749 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3751 Rep = Builder.CreateBitCast(Rep, FTy);
3754 }
else if (Name.starts_with(
"avx512.mask.andn.") ||
3755 Name.starts_with(
"avx512.mask.pandn.")) {
3758 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->
getArgOperand(0), ITy));
3759 Rep = Builder.CreateAnd(Rep,
3761 Rep = Builder.CreateBitCast(Rep, FTy);
3764 }
else if (Name.starts_with(
"avx512.mask.or.") ||
3765 Name.starts_with(
"avx512.mask.por.")) {
3768 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3770 Rep = Builder.CreateBitCast(Rep, FTy);
3773 }
else if (Name.starts_with(
"avx512.mask.xor.") ||
3774 Name.starts_with(
"avx512.mask.pxor.")) {
3777 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3779 Rep = Builder.CreateBitCast(Rep, FTy);
3782 }
else if (Name.starts_with(
"avx512.mask.padd.")) {
3786 }
else if (Name.starts_with(
"avx512.mask.psub.")) {
3790 }
else if (Name.starts_with(
"avx512.mask.pmull.")) {
3794 }
else if (Name.starts_with(
"avx512.mask.add.p")) {
3795 if (Name.ends_with(
".512")) {
3797 if (Name[17] ==
's')
3798 IID = Intrinsic::x86_avx512_add_ps_512;
3800 IID = Intrinsic::x86_avx512_add_pd_512;
3802 Rep = Builder.CreateIntrinsic(
3810 }
else if (Name.starts_with(
"avx512.mask.div.p")) {
3811 if (Name.ends_with(
".512")) {
3813 if (Name[17] ==
's')
3814 IID = Intrinsic::x86_avx512_div_ps_512;
3816 IID = Intrinsic::x86_avx512_div_pd_512;
3818 Rep = Builder.CreateIntrinsic(
3826 }
else if (Name.starts_with(
"avx512.mask.mul.p")) {
3827 if (Name.ends_with(
".512")) {
3829 if (Name[17] ==
's')
3830 IID = Intrinsic::x86_avx512_mul_ps_512;
3832 IID = Intrinsic::x86_avx512_mul_pd_512;
3834 Rep = Builder.CreateIntrinsic(
3842 }
else if (Name.starts_with(
"avx512.mask.sub.p")) {
3843 if (Name.ends_with(
".512")) {
3845 if (Name[17] ==
's')
3846 IID = Intrinsic::x86_avx512_sub_ps_512;
3848 IID = Intrinsic::x86_avx512_sub_pd_512;
3850 Rep = Builder.CreateIntrinsic(
3858 }
else if ((Name.starts_with(
"avx512.mask.max.p") ||
3859 Name.starts_with(
"avx512.mask.min.p")) &&
3860 Name.drop_front(18) ==
".512") {
3861 bool IsDouble = Name[17] ==
'd';
3862 bool IsMin = Name[13] ==
'i';
3864 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3865 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3868 Rep = Builder.CreateIntrinsic(
3873 }
else if (Name.starts_with(
"avx512.mask.lzcnt.")) {
3875 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->
getType(),
3876 {CI->getArgOperand(0), Builder.getInt1(false)});
3879 }
else if (Name.starts_with(
"avx512.mask.psll")) {
3880 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3881 bool IsVariable = Name[16] ==
'v';
3882 char Size = Name[16] ==
'.' ? Name[17]
3883 : Name[17] ==
'.' ? Name[18]
3884 : Name[18] ==
'.' ? Name[19]
3888 if (IsVariable && Name[17] !=
'.') {
3889 if (
Size ==
'd' && Name[17] ==
'2')
3890 IID = Intrinsic::x86_avx2_psllv_q;
3891 else if (
Size ==
'd' && Name[17] ==
'4')
3892 IID = Intrinsic::x86_avx2_psllv_q_256;
3893 else if (
Size ==
's' && Name[17] ==
'4')
3894 IID = Intrinsic::x86_avx2_psllv_d;
3895 else if (
Size ==
's' && Name[17] ==
'8')
3896 IID = Intrinsic::x86_avx2_psllv_d_256;
3897 else if (
Size ==
'h' && Name[17] ==
'8')
3898 IID = Intrinsic::x86_avx512_psllv_w_128;
3899 else if (
Size ==
'h' && Name[17] ==
'1')
3900 IID = Intrinsic::x86_avx512_psllv_w_256;
3901 else if (Name[17] ==
'3' && Name[18] ==
'2')
3902 IID = Intrinsic::x86_avx512_psllv_w_512;
3905 }
else if (Name.ends_with(
".128")) {
3907 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3908 : Intrinsic::x86_sse2_psll_d;
3909 else if (
Size ==
'q')
3910 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3911 : Intrinsic::x86_sse2_psll_q;
3912 else if (
Size ==
'w')
3913 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3914 : Intrinsic::x86_sse2_psll_w;
3917 }
else if (Name.ends_with(
".256")) {
3919 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3920 : Intrinsic::x86_avx2_psll_d;
3921 else if (
Size ==
'q')
3922 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3923 : Intrinsic::x86_avx2_psll_q;
3924 else if (
Size ==
'w')
3925 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3926 : Intrinsic::x86_avx2_psll_w;
3931 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3932 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3933 : Intrinsic::x86_avx512_psll_d_512;
3934 else if (
Size ==
'q')
3935 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3936 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3937 : Intrinsic::x86_avx512_psll_q_512;
3938 else if (
Size ==
'w')
3939 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3940 : Intrinsic::x86_avx512_psll_w_512;
3946 }
else if (Name.starts_with(
"avx512.mask.psrl")) {
3947 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3948 bool IsVariable = Name[16] ==
'v';
3949 char Size = Name[16] ==
'.' ? Name[17]
3950 : Name[17] ==
'.' ? Name[18]
3951 : Name[18] ==
'.' ? Name[19]
3955 if (IsVariable && Name[17] !=
'.') {
3956 if (
Size ==
'd' && Name[17] ==
'2')
3957 IID = Intrinsic::x86_avx2_psrlv_q;
3958 else if (
Size ==
'd' && Name[17] ==
'4')
3959 IID = Intrinsic::x86_avx2_psrlv_q_256;
3960 else if (
Size ==
's' && Name[17] ==
'4')
3961 IID = Intrinsic::x86_avx2_psrlv_d;
3962 else if (
Size ==
's' && Name[17] ==
'8')
3963 IID = Intrinsic::x86_avx2_psrlv_d_256;
3964 else if (
Size ==
'h' && Name[17] ==
'8')
3965 IID = Intrinsic::x86_avx512_psrlv_w_128;
3966 else if (
Size ==
'h' && Name[17] ==
'1')
3967 IID = Intrinsic::x86_avx512_psrlv_w_256;
3968 else if (Name[17] ==
'3' && Name[18] ==
'2')
3969 IID = Intrinsic::x86_avx512_psrlv_w_512;
3972 }
else if (Name.ends_with(
".128")) {
3974 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3975 : Intrinsic::x86_sse2_psrl_d;
3976 else if (
Size ==
'q')
3977 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3978 : Intrinsic::x86_sse2_psrl_q;
3979 else if (
Size ==
'w')
3980 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3981 : Intrinsic::x86_sse2_psrl_w;
3984 }
else if (Name.ends_with(
".256")) {
3986 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3987 : Intrinsic::x86_avx2_psrl_d;
3988 else if (
Size ==
'q')
3989 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3990 : Intrinsic::x86_avx2_psrl_q;
3991 else if (
Size ==
'w')
3992 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3993 : Intrinsic::x86_avx2_psrl_w;
3998 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
3999 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
4000 : Intrinsic::x86_avx512_psrl_d_512;
4001 else if (
Size ==
'q')
4002 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
4003 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
4004 : Intrinsic::x86_avx512_psrl_q_512;
4005 else if (
Size ==
'w')
4006 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
4007 : Intrinsic::x86_avx512_psrl_w_512;
4013 }
else if (Name.starts_with(
"avx512.mask.psra")) {
4014 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
4015 bool IsVariable = Name[16] ==
'v';
4016 char Size = Name[16] ==
'.' ? Name[17]
4017 : Name[17] ==
'.' ? Name[18]
4018 : Name[18] ==
'.' ? Name[19]
4022 if (IsVariable && Name[17] !=
'.') {
4023 if (
Size ==
's' && Name[17] ==
'4')
4024 IID = Intrinsic::x86_avx2_psrav_d;
4025 else if (
Size ==
's' && Name[17] ==
'8')
4026 IID = Intrinsic::x86_avx2_psrav_d_256;
4027 else if (
Size ==
'h' && Name[17] ==
'8')
4028 IID = Intrinsic::x86_avx512_psrav_w_128;
4029 else if (
Size ==
'h' && Name[17] ==
'1')
4030 IID = Intrinsic::x86_avx512_psrav_w_256;
4031 else if (Name[17] ==
'3' && Name[18] ==
'2')
4032 IID = Intrinsic::x86_avx512_psrav_w_512;
4035 }
else if (Name.ends_with(
".128")) {
4037 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4038 : Intrinsic::x86_sse2_psra_d;
4039 else if (
Size ==
'q')
4040 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4041 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4042 : Intrinsic::x86_avx512_psra_q_128;
4043 else if (
Size ==
'w')
4044 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4045 : Intrinsic::x86_sse2_psra_w;
4048 }
else if (Name.ends_with(
".256")) {
4050 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4051 : Intrinsic::x86_avx2_psra_d;
4052 else if (
Size ==
'q')
4053 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4054 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4055 : Intrinsic::x86_avx512_psra_q_256;
4056 else if (
Size ==
'w')
4057 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4058 : Intrinsic::x86_avx2_psra_w;
4063 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4064 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4065 : Intrinsic::x86_avx512_psra_d_512;
4066 else if (
Size ==
'q')
4067 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4068 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4069 : Intrinsic::x86_avx512_psra_q_512;
4070 else if (
Size ==
'w')
4071 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4072 : Intrinsic::x86_avx512_psra_w_512;
4078 }
else if (Name.starts_with(
"avx512.mask.move.s")) {
4080 }
else if (Name.starts_with(
"avx512.cvtmask2")) {
4082 }
else if (Name.ends_with(
".movntdqa")) {
4086 LoadInst *LI = Builder.CreateAlignedLoad(
4091 }
else if (Name.starts_with(
"fma.vfmadd.") ||
4092 Name.starts_with(
"fma.vfmsub.") ||
4093 Name.starts_with(
"fma.vfnmadd.") ||
4094 Name.starts_with(
"fma.vfnmsub.")) {
4095 bool NegMul = Name[6] ==
'n';
4096 bool NegAcc = NegMul ? Name[8] ==
's' : Name[7] ==
's';
4097 bool IsScalar = NegMul ? Name[12] ==
's' : Name[11] ==
's';
4108 if (NegMul && !IsScalar)
4109 Ops[0] = Builder.CreateFNeg(
Ops[0]);
4110 if (NegMul && IsScalar)
4111 Ops[1] = Builder.CreateFNeg(
Ops[1]);
4113 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4115 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
4119 }
else if (Name.starts_with(
"fma4.vfmadd.s")) {
4127 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
4131 }
else if (Name.starts_with(
"avx512.mask.vfmadd.s") ||
4132 Name.starts_with(
"avx512.maskz.vfmadd.s") ||
4133 Name.starts_with(
"avx512.mask3.vfmadd.s") ||
4134 Name.starts_with(
"avx512.mask3.vfmsub.s") ||
4135 Name.starts_with(
"avx512.mask3.vfnmsub.s")) {
4136 bool IsMask3 = Name[11] ==
'3';
4137 bool IsMaskZ = Name[11] ==
'z';
4139 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4140 bool NegMul = Name[2] ==
'n';
4141 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
4147 if (NegMul && (IsMask3 || IsMaskZ))
4148 A = Builder.CreateFNeg(
A);
4149 if (NegMul && !(IsMask3 || IsMaskZ))
4150 B = Builder.CreateFNeg(
B);
4152 C = Builder.CreateFNeg(
C);
4154 A = Builder.CreateExtractElement(
A, (
uint64_t)0);
4155 B = Builder.CreateExtractElement(
B, (
uint64_t)0);
4156 C = Builder.CreateExtractElement(
C, (
uint64_t)0);
4163 if (Name.back() ==
'd')
4164 IID = Intrinsic::x86_avx512_vfmadd_f64;
4166 IID = Intrinsic::x86_avx512_vfmadd_f32;
4167 Rep = Builder.CreateIntrinsic(IID,
Ops);
4169 Rep = Builder.CreateFMA(
A,
B,
C);
4178 if (NegAcc && IsMask3)
4183 Rep = Builder.CreateInsertElement(CI->
getArgOperand(IsMask3 ? 2 : 0), Rep,
4185 }
else if (Name.starts_with(
"avx512.mask.vfmadd.p") ||
4186 Name.starts_with(
"avx512.mask.vfnmadd.p") ||
4187 Name.starts_with(
"avx512.mask.vfnmsub.p") ||
4188 Name.starts_with(
"avx512.mask3.vfmadd.p") ||
4189 Name.starts_with(
"avx512.mask3.vfmsub.p") ||
4190 Name.starts_with(
"avx512.mask3.vfnmsub.p") ||
4191 Name.starts_with(
"avx512.maskz.vfmadd.p")) {
4192 bool IsMask3 = Name[11] ==
'3';
4193 bool IsMaskZ = Name[11] ==
'z';
4195 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4196 bool NegMul = Name[2] ==
'n';
4197 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
4203 if (NegMul && (IsMask3 || IsMaskZ))
4204 A = Builder.CreateFNeg(
A);
4205 if (NegMul && !(IsMask3 || IsMaskZ))
4206 B = Builder.CreateFNeg(
B);
4208 C = Builder.CreateFNeg(
C);
4215 if (Name[Name.size() - 5] ==
's')
4216 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4218 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4222 Rep = Builder.CreateFMA(
A,
B,
C);
4230 }
else if (Name.starts_with(
"fma.vfmsubadd.p")) {
4234 if (VecWidth == 128 && EltWidth == 32)
4235 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4236 else if (VecWidth == 256 && EltWidth == 32)
4237 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4238 else if (VecWidth == 128 && EltWidth == 64)
4239 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4240 else if (VecWidth == 256 && EltWidth == 64)
4241 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4247 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4248 Rep = Builder.CreateIntrinsic(IID,
Ops);
4249 }
else if (Name.starts_with(
"avx512.mask.vfmaddsub.p") ||
4250 Name.starts_with(
"avx512.mask3.vfmaddsub.p") ||
4251 Name.starts_with(
"avx512.maskz.vfmaddsub.p") ||
4252 Name.starts_with(
"avx512.mask3.vfmsubadd.p")) {
4253 bool IsMask3 = Name[11] ==
'3';
4254 bool IsMaskZ = Name[11] ==
'z';
4256 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4257 bool IsSubAdd = Name[3] ==
's';
4261 if (Name[Name.size() - 5] ==
's')
4262 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4264 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4269 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4271 Rep = Builder.CreateIntrinsic(IID,
Ops);
4280 Value *Odd = Builder.CreateCall(FMA,
Ops);
4281 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4282 Value *Even = Builder.CreateCall(FMA,
Ops);
4288 for (
int i = 0; i != NumElts; ++i)
4289 Idxs[i] = i + (i % 2) * NumElts;
4291 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4299 }
else if (Name.starts_with(
"avx512.mask.pternlog.") ||
4300 Name.starts_with(
"avx512.maskz.pternlog.")) {
4301 bool ZeroMask = Name[11] ==
'z';
4305 if (VecWidth == 128 && EltWidth == 32)
4306 IID = Intrinsic::x86_avx512_pternlog_d_128;
4307 else if (VecWidth == 256 && EltWidth == 32)
4308 IID = Intrinsic::x86_avx512_pternlog_d_256;
4309 else if (VecWidth == 512 && EltWidth == 32)
4310 IID = Intrinsic::x86_avx512_pternlog_d_512;
4311 else if (VecWidth == 128 && EltWidth == 64)
4312 IID = Intrinsic::x86_avx512_pternlog_q_128;
4313 else if (VecWidth == 256 && EltWidth == 64)
4314 IID = Intrinsic::x86_avx512_pternlog_q_256;
4315 else if (VecWidth == 512 && EltWidth == 64)
4316 IID = Intrinsic::x86_avx512_pternlog_q_512;
4322 Rep = Builder.CreateIntrinsic(IID, Args);
4326 }
else if (Name.starts_with(
"avx512.mask.vpmadd52") ||
4327 Name.starts_with(
"avx512.maskz.vpmadd52")) {
4328 bool ZeroMask = Name[11] ==
'z';
4329 bool High = Name[20] ==
'h' || Name[21] ==
'h';
4332 if (VecWidth == 128 && !
High)
4333 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4334 else if (VecWidth == 256 && !
High)
4335 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4336 else if (VecWidth == 512 && !
High)
4337 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4338 else if (VecWidth == 128 &&
High)
4339 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4340 else if (VecWidth == 256 &&
High)
4341 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4342 else if (VecWidth == 512 &&
High)
4343 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4349 Rep = Builder.CreateIntrinsic(IID, Args);
4353 }
else if (Name.starts_with(
"avx512.mask.vpermi2var.") ||
4354 Name.starts_with(
"avx512.mask.vpermt2var.") ||
4355 Name.starts_with(
"avx512.maskz.vpermt2var.")) {
4356 bool ZeroMask = Name[11] ==
'z';
4357 bool IndexForm = Name[17] ==
'i';
4359 }
else if (Name.starts_with(
"avx512.mask.vpdpbusd.") ||
4360 Name.starts_with(
"avx512.maskz.vpdpbusd.") ||
4361 Name.starts_with(
"avx512.mask.vpdpbusds.") ||
4362 Name.starts_with(
"avx512.maskz.vpdpbusds.")) {
4363 bool ZeroMask = Name[11] ==
'z';
4364 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4367 if (VecWidth == 128 && !IsSaturating)
4368 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4369 else if (VecWidth == 256 && !IsSaturating)
4370 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4371 else if (VecWidth == 512 && !IsSaturating)
4372 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4373 else if (VecWidth == 128 && IsSaturating)
4374 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4375 else if (VecWidth == 256 && IsSaturating)
4376 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4377 else if (VecWidth == 512 && IsSaturating)
4378 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4388 if (Args[1]->
getType()->isVectorTy() &&
4391 ->isIntegerTy(32) &&
4392 Args[2]->
getType()->isVectorTy() &&
4395 ->isIntegerTy(32)) {
4396 Type *NewArgType =
nullptr;
4397 if (VecWidth == 128)
4399 else if (VecWidth == 256)
4401 else if (VecWidth == 512)
4406 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4407 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4410 Rep = Builder.CreateIntrinsic(IID, Args);
4414 }
else if (Name.starts_with(
"avx512.mask.vpdpwssd.") ||
4415 Name.starts_with(
"avx512.maskz.vpdpwssd.") ||
4416 Name.starts_with(
"avx512.mask.vpdpwssds.") ||
4417 Name.starts_with(
"avx512.maskz.vpdpwssds.")) {
4418 bool ZeroMask = Name[11] ==
'z';
4419 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4422 if (VecWidth == 128 && !IsSaturating)
4423 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4424 else if (VecWidth == 256 && !IsSaturating)
4425 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4426 else if (VecWidth == 512 && !IsSaturating)
4427 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4428 else if (VecWidth == 128 && IsSaturating)
4429 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4430 else if (VecWidth == 256 && IsSaturating)
4431 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4432 else if (VecWidth == 512 && IsSaturating)
4433 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4443 if (Args[1]->
getType()->isVectorTy() &&
4446 ->isIntegerTy(32) &&
4447 Args[2]->
getType()->isVectorTy() &&
4450 ->isIntegerTy(32)) {
4451 Type *NewArgType =
nullptr;
4452 if (VecWidth == 128)
4454 else if (VecWidth == 256)
4456 else if (VecWidth == 512)
4461 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4462 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4465 Rep = Builder.CreateIntrinsic(IID, Args);
4469 }
else if (Name ==
"addcarryx.u32" || Name ==
"addcarryx.u64" ||
4470 Name ==
"addcarry.u32" || Name ==
"addcarry.u64" ||
4471 Name ==
"subborrow.u32" || Name ==
"subborrow.u64") {
4473 if (Name[0] ==
'a' && Name.back() ==
'2')
4474 IID = Intrinsic::x86_addcarry_32;
4475 else if (Name[0] ==
'a' && Name.back() ==
'4')
4476 IID = Intrinsic::x86_addcarry_64;
4477 else if (Name[0] ==
's' && Name.back() ==
'2')
4478 IID = Intrinsic::x86_subborrow_32;
4479 else if (Name[0] ==
's' && Name.back() ==
'4')
4480 IID = Intrinsic::x86_subborrow_64;
4487 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4490 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
4493 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4497 }
else if (Name.starts_with(
"avx512.mask.") &&
4507 if (Name.starts_with(
"neon.bfcvt")) {
4508 if (Name.starts_with(
"neon.bfcvtn2")) {
4510 std::iota(LoMask.
begin(), LoMask.
end(), 0);
4512 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4513 Value *Inactive = Builder.CreateShuffleVector(CI->
getOperand(0), LoMask);
4516 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4517 }
else if (Name.starts_with(
"neon.bfcvtn")) {
4519 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4523 dbgs() <<
"Trunc: " << *Trunc <<
"\n";
4524 return Builder.CreateShuffleVector(
4527 return Builder.CreateFPTrunc(CI->
getOperand(0),
4530 }
else if (Name.starts_with(
"sve.fcvt")) {
4533 .
Case(
"sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4534 .
Case(
"sve.fcvtnt.bf16f32",
4535 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4547 if (Args[1]->
getType() != BadPredTy)
4550 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4551 BadPredTy, Args[1]);
4552 Args[1] = Builder.CreateIntrinsic(
4553 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4555 return Builder.CreateIntrinsic(NewID, Args,
nullptr,
4564 if (Name ==
"mve.vctp64.old") {
4567 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4570 Value *C1 = Builder.CreateIntrinsic(
4571 Intrinsic::arm_mve_pred_v2i,
4573 return Builder.CreateIntrinsic(
4574 Intrinsic::arm_mve_pred_i2v,
4576 }
else if (Name ==
"mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4577 Name ==
"mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4578 Name ==
"mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4579 Name ==
"mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4581 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4582 Name ==
"mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4583 Name ==
"mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4584 Name ==
"mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4586 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4587 Name ==
"mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4588 Name ==
"cde.vcx1q.predicated.v2i64.v4i1" ||
4589 Name ==
"cde.vcx1qa.predicated.v2i64.v4i1" ||
4590 Name ==
"cde.vcx2q.predicated.v2i64.v4i1" ||
4591 Name ==
"cde.vcx2qa.predicated.v2i64.v4i1" ||
4592 Name ==
"cde.vcx3q.predicated.v2i64.v4i1" ||
4593 Name ==
"cde.vcx3qa.predicated.v2i64.v4i1") {
4594 std::vector<Type *> Tys;
4598 case Intrinsic::arm_mve_mull_int_predicated:
4599 case Intrinsic::arm_mve_vqdmull_predicated:
4600 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4603 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4604 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4605 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4609 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4613 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4617 case Intrinsic::arm_cde_vcx1q_predicated:
4618 case Intrinsic::arm_cde_vcx1qa_predicated:
4619 case Intrinsic::arm_cde_vcx2q_predicated:
4620 case Intrinsic::arm_cde_vcx2qa_predicated:
4621 case Intrinsic::arm_cde_vcx3q_predicated:
4622 case Intrinsic::arm_cde_vcx3qa_predicated:
4629 std::vector<Value *>
Ops;
4631 Type *Ty =
Op->getType();
4632 if (Ty->getScalarSizeInBits() == 1) {
4633 Value *C1 = Builder.CreateIntrinsic(
4634 Intrinsic::arm_mve_pred_v2i,
4636 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4641 return Builder.CreateIntrinsic(
ID, Tys,
Ops,
nullptr,
4656 auto UpgradeLegacyWMMAIUIntrinsicCall =
4661 Args.push_back(Builder.getFalse());
4665 F->getParent(),
F->getIntrinsicID(), OverloadTys);
4672 auto *NewCall =
cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4677 NewCall->copyMetadata(*CI);
4681 if (
F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
4682 assert(CI->
arg_size() == 7 &&
"Legacy int_amdgcn_wmma_i32_16x16x64_iu8 "
4683 "intrinsic should have 7 arguments");
4686 return UpgradeLegacyWMMAIUIntrinsicCall(
F, CI, Builder, {
T1, T2});
4688 if (
F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8) {
4689 assert(CI->
arg_size() == 8 &&
"Legacy int_amdgcn_swmmac_i32_16x16x128_iu8 "
4690 "intrinsic should have 8 arguments");
4695 return UpgradeLegacyWMMAIUIntrinsicCall(
F, CI, Builder, {
T1, T2, T3, T4});
4715 if (NumOperands < 3)
4728 bool IsVolatile =
false;
4732 if (NumOperands > 3)
4737 if (NumOperands > 5) {
4739 IsVolatile = !VolatileArg || !VolatileArg->
isZero();
4753 if (VT->getElementType()->isIntegerTy(16)) {
4756 Val = Builder.CreateBitCast(Val, AsBF16);
4764 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4766 unsigned AddrSpace = PtrTy->getAddressSpace();
4769 RMW->
setMetadata(
"amdgpu.no.fine.grained.memory", EmptyMD);
4771 RMW->
setMetadata(
"amdgpu.ignore.denormal.mode", EmptyMD);
4776 MDNode *RangeNotPrivate =
4779 RMW->
setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4785 return Builder.CreateBitCast(RMW, RetTy);
4806 return MAV->getMetadata();
4813 return I->getDebugLoc().getAsMDNode();
4821 if (Name ==
"label") {
4824 }
else if (Name ==
"assign") {
4831 }
else if (Name ==
"declare") {
4836 }
else if (Name ==
"addr") {
4846 unwrapMAVOp(CI, 1), ExprNode,
nullptr,
nullptr,
nullptr,
4848 }
else if (Name ==
"value") {
4851 unsigned ExprOp = 2;
4865 assert(DR &&
"Unhandled intrinsic kind in upgrade to DbgRecord");
4873 int64_t OffsetVal =
Offset->getSExtValue();
4874 return Builder.CreateIntrinsic(OffsetVal >= 0
4875 ? Intrinsic::vector_splice_left
4876 : Intrinsic::vector_splice_right,
4878 {CI->getArgOperand(0), CI->getArgOperand(1),
4879 Builder.getInt32(std::abs(OffsetVal))});
4884 if (Name.starts_with(
"to.fp16")) {
4886 Builder.CreateFPTrunc(CI->
getArgOperand(0), Builder.getHalfTy());
4887 return Builder.CreateBitCast(Cast, CI->
getType());
4890 if (Name.starts_with(
"from.fp16")) {
4892 Builder.CreateBitCast(CI->
getArgOperand(0), Builder.getHalfTy());
4893 return Builder.CreateFPExt(Cast, CI->
getType());
4918 if (!Name.consume_front(
"llvm."))
4921 bool IsX86 = Name.consume_front(
"x86.");
4922 bool IsNVVM = Name.consume_front(
"nvvm.");
4923 bool IsAArch64 = Name.consume_front(
"aarch64.");
4924 bool IsARM = Name.consume_front(
"arm.");
4925 bool IsAMDGCN = Name.consume_front(
"amdgcn.");
4926 bool IsDbg = Name.consume_front(
"dbg.");
4928 (Name.consume_front(
"experimental.vector.splice") ||
4929 Name.consume_front(
"vector.splice")) &&
4930 !(Name.starts_with(
".left") || Name.starts_with(
".right"));
4931 Value *Rep =
nullptr;
4933 if (!IsX86 && Name ==
"stackprotectorcheck") {
4935 }
else if (IsNVVM) {
4939 }
else if (IsAArch64) {
4943 }
else if (IsAMDGCN) {
4947 }
else if (IsOldSplice) {
4949 }
else if (Name.consume_front(
"convert.")) {
4961 const auto &DefaultCase = [&]() ->
void {
4969 "Unknown function for CallBase upgrade and isn't just a name change");
4977 "Return type must have changed");
4978 assert(OldST->getNumElements() ==
4980 "Must have same number of elements");
4983 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
4986 for (
unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4987 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4988 Res = Builder.CreateInsertValue(Res, Elem, Idx);
5007 case Intrinsic::arm_neon_vst1:
5008 case Intrinsic::arm_neon_vst2:
5009 case Intrinsic::arm_neon_vst3:
5010 case Intrinsic::arm_neon_vst4:
5011 case Intrinsic::arm_neon_vst2lane:
5012 case Intrinsic::arm_neon_vst3lane:
5013 case Intrinsic::arm_neon_vst4lane: {
5015 NewCall = Builder.CreateCall(NewFn, Args);
5018 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
5019 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
5020 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
5025 NewCall = Builder.CreateCall(NewFn, Args);
5028 case Intrinsic::aarch64_sve_ld3_sret:
5029 case Intrinsic::aarch64_sve_ld4_sret:
5030 case Intrinsic::aarch64_sve_ld2_sret: {
5032 Name = Name.substr(5);
5039 unsigned MinElts = RetTy->getMinNumElements() /
N;
5041 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
5043 for (
unsigned I = 0;
I <
N;
I++) {
5044 Value *SRet = Builder.CreateExtractValue(NewLdCall,
I);
5045 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet,
I * MinElts);
5051 case Intrinsic::coro_end: {
5054 NewCall = Builder.CreateCall(NewFn, Args);
5058 case Intrinsic::vector_extract: {
5060 Name = Name.substr(5);
5061 if (!Name.starts_with(
"aarch64.sve.tuple.get")) {
5066 unsigned MinElts = RetTy->getMinNumElements();
5069 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0), NewIdx});
5073 case Intrinsic::vector_insert: {
5075 Name = Name.substr(5);
5076 if (!Name.starts_with(
"aarch64.sve.tuple")) {
5080 if (Name.starts_with(
"aarch64.sve.tuple.set")) {
5085 NewCall = Builder.CreateCall(
5089 if (Name.starts_with(
"aarch64.sve.tuple.create")) {
5095 assert(
N > 1 &&
"Create is expected to be between 2-4");
5098 unsigned MinElts = RetTy->getMinNumElements() /
N;
5099 for (
unsigned I = 0;
I <
N;
I++) {
5101 Ret = Builder.CreateInsertVector(RetTy, Ret, V,
I * MinElts);
5108 case Intrinsic::arm_neon_bfdot:
5109 case Intrinsic::arm_neon_bfmmla:
5110 case Intrinsic::arm_neon_bfmlalb:
5111 case Intrinsic::arm_neon_bfmlalt:
5112 case Intrinsic::aarch64_neon_bfdot:
5113 case Intrinsic::aarch64_neon_bfmmla:
5114 case Intrinsic::aarch64_neon_bfmlalb:
5115 case Intrinsic::aarch64_neon_bfmlalt: {
5118 "Mismatch between function args and call args");
5119 size_t OperandWidth =
5121 assert((OperandWidth == 64 || OperandWidth == 128) &&
5122 "Unexpected operand width");
5124 auto Iter = CI->
args().begin();
5125 Args.push_back(*Iter++);
5126 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5127 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5128 NewCall = Builder.CreateCall(NewFn, Args);
5132 case Intrinsic::bitreverse:
5133 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5136 case Intrinsic::ctlz:
5137 case Intrinsic::cttz: {
5144 Builder.CreateCall(NewFn, {CI->
getArgOperand(0), Builder.getFalse()});
5148 case Intrinsic::objectsize: {
5149 Value *NullIsUnknownSize =
5153 NewCall = Builder.CreateCall(
5158 case Intrinsic::ctpop:
5159 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5161 case Intrinsic::dbg_value: {
5163 Name = Name.substr(5);
5165 if (Name.starts_with(
"dbg.addr")) {
5179 if (
Offset->isZeroValue()) {
5180 NewCall = Builder.CreateCall(
5189 case Intrinsic::ptr_annotation:
5197 NewCall = Builder.CreateCall(
5206 case Intrinsic::var_annotation:
5213 NewCall = Builder.CreateCall(
5222 case Intrinsic::riscv_aes32dsi:
5223 case Intrinsic::riscv_aes32dsmi:
5224 case Intrinsic::riscv_aes32esi:
5225 case Intrinsic::riscv_aes32esmi:
5226 case Intrinsic::riscv_sm4ks:
5227 case Intrinsic::riscv_sm4ed: {
5237 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5238 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5244 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5245 Value *Res = NewCall;
5247 Res = Builder.CreateIntCast(NewCall, CI->
getType(),
true);
5253 case Intrinsic::nvvm_mapa_shared_cluster: {
5257 Value *Res = NewCall;
5258 Res = Builder.CreateAddrSpaceCast(
5265 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5266 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5269 Args[0] = Builder.CreateAddrSpaceCast(
5272 NewCall = Builder.CreateCall(NewFn, Args);
5278 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5279 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5280 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5281 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5282 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5283 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5284 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5285 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5292 Args[0] = Builder.CreateAddrSpaceCast(
5301 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5303 NewCall = Builder.CreateCall(NewFn, Args);
5309 case Intrinsic::riscv_sha256sig0:
5310 case Intrinsic::riscv_sha256sig1:
5311 case Intrinsic::riscv_sha256sum0:
5312 case Intrinsic::riscv_sha256sum1:
5313 case Intrinsic::riscv_sm3p0:
5314 case Intrinsic::riscv_sm3p1: {
5321 Builder.CreateTrunc(CI->
getArgOperand(0), Builder.getInt32Ty());
5323 NewCall = Builder.CreateCall(NewFn, Arg);
5325 Builder.CreateIntCast(NewCall, CI->
getType(),
true);
5332 case Intrinsic::x86_xop_vfrcz_ss:
5333 case Intrinsic::x86_xop_vfrcz_sd:
5334 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(1)});
5337 case Intrinsic::x86_xop_vpermil2pd:
5338 case Intrinsic::x86_xop_vpermil2ps:
5339 case Intrinsic::x86_xop_vpermil2pd_256:
5340 case Intrinsic::x86_xop_vpermil2ps_256: {
5344 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5345 NewCall = Builder.CreateCall(NewFn, Args);
5349 case Intrinsic::x86_sse41_ptestc:
5350 case Intrinsic::x86_sse41_ptestz:
5351 case Intrinsic::x86_sse41_ptestnzc: {
5365 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy,
"cast");
5366 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
5368 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5372 case Intrinsic::x86_rdtscp: {
5378 NewCall = Builder.CreateCall(NewFn);
5380 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
5383 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5391 case Intrinsic::x86_sse41_insertps:
5392 case Intrinsic::x86_sse41_dppd:
5393 case Intrinsic::x86_sse41_dpps:
5394 case Intrinsic::x86_sse41_mpsadbw:
5395 case Intrinsic::x86_avx_dp_ps_256:
5396 case Intrinsic::x86_avx2_mpsadbw: {
5402 Args.back() = Builder.CreateTrunc(Args.back(),
Type::getInt8Ty(
C),
"trunc");
5403 NewCall = Builder.CreateCall(NewFn, Args);
5407 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5408 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5409 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5410 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5411 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5412 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5418 NewCall = Builder.CreateCall(NewFn, Args);
5427 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5428 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5429 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5430 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5431 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5432 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5436 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5437 Args[1] = Builder.CreateBitCast(
5440 NewCall = Builder.CreateCall(NewFn, Args);
5441 Value *Res = Builder.CreateBitCast(
5449 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5450 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5451 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5455 Args[1] = Builder.CreateBitCast(
5457 Args[2] = Builder.CreateBitCast(
5460 NewCall = Builder.CreateCall(NewFn, Args);
5464 case Intrinsic::thread_pointer: {
5465 NewCall = Builder.CreateCall(NewFn, {});
5469 case Intrinsic::memcpy:
5470 case Intrinsic::memmove:
5471 case Intrinsic::memset: {
5487 NewCall = Builder.CreateCall(NewFn, Args);
5489 AttributeList NewAttrs = AttributeList::get(
5490 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5491 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5492 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5497 MemCI->setDestAlignment(
Align->getMaybeAlignValue());
5500 MTI->setSourceAlignment(
Align->getMaybeAlignValue());
5504 case Intrinsic::masked_load:
5505 case Intrinsic::masked_gather:
5506 case Intrinsic::masked_store:
5507 case Intrinsic::masked_scatter: {
5513 auto GetMaybeAlign = [](
Value *
Op) {
5523 auto GetAlign = [&](
Value *
Op) {
5532 case Intrinsic::masked_load:
5533 NewCall = Builder.CreateMaskedLoad(
5537 case Intrinsic::masked_gather:
5538 NewCall = Builder.CreateMaskedGather(
5544 case Intrinsic::masked_store:
5545 NewCall = Builder.CreateMaskedStore(
5549 case Intrinsic::masked_scatter:
5550 NewCall = Builder.CreateMaskedScatter(
5552 DL.getValueOrABITypeAlignment(
5566 case Intrinsic::lifetime_start:
5567 case Intrinsic::lifetime_end: {
5579 NewCall = Builder.CreateLifetimeStart(Ptr);
5581 NewCall = Builder.CreateLifetimeEnd(Ptr);
5590 case Intrinsic::x86_avx512_vpdpbusd_128:
5591 case Intrinsic::x86_avx512_vpdpbusd_256:
5592 case Intrinsic::x86_avx512_vpdpbusd_512:
5593 case Intrinsic::x86_avx512_vpdpbusds_128:
5594 case Intrinsic::x86_avx512_vpdpbusds_256:
5595 case Intrinsic::x86_avx512_vpdpbusds_512:
5596 case Intrinsic::x86_avx2_vpdpbssd_128:
5597 case Intrinsic::x86_avx2_vpdpbssd_256:
5598 case Intrinsic::x86_avx10_vpdpbssd_512:
5599 case Intrinsic::x86_avx2_vpdpbssds_128:
5600 case Intrinsic::x86_avx2_vpdpbssds_256:
5601 case Intrinsic::x86_avx10_vpdpbssds_512:
5602 case Intrinsic::x86_avx2_vpdpbsud_128:
5603 case Intrinsic::x86_avx2_vpdpbsud_256:
5604 case Intrinsic::x86_avx10_vpdpbsud_512:
5605 case Intrinsic::x86_avx2_vpdpbsuds_128:
5606 case Intrinsic::x86_avx2_vpdpbsuds_256:
5607 case Intrinsic::x86_avx10_vpdpbsuds_512:
5608 case Intrinsic::x86_avx2_vpdpbuud_128:
5609 case Intrinsic::x86_avx2_vpdpbuud_256:
5610 case Intrinsic::x86_avx10_vpdpbuud_512:
5611 case Intrinsic::x86_avx2_vpdpbuuds_128:
5612 case Intrinsic::x86_avx2_vpdpbuuds_256:
5613 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5618 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5619 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5621 NewCall = Builder.CreateCall(NewFn, Args);
5624 case Intrinsic::x86_avx512_vpdpwssd_128:
5625 case Intrinsic::x86_avx512_vpdpwssd_256:
5626 case Intrinsic::x86_avx512_vpdpwssd_512:
5627 case Intrinsic::x86_avx512_vpdpwssds_128:
5628 case Intrinsic::x86_avx512_vpdpwssds_256:
5629 case Intrinsic::x86_avx512_vpdpwssds_512:
5630 case Intrinsic::x86_avx2_vpdpwsud_128:
5631 case Intrinsic::x86_avx2_vpdpwsud_256:
5632 case Intrinsic::x86_avx10_vpdpwsud_512:
5633 case Intrinsic::x86_avx2_vpdpwsuds_128:
5634 case Intrinsic::x86_avx2_vpdpwsuds_256:
5635 case Intrinsic::x86_avx10_vpdpwsuds_512:
5636 case Intrinsic::x86_avx2_vpdpwusd_128:
5637 case Intrinsic::x86_avx2_vpdpwusd_256:
5638 case Intrinsic::x86_avx10_vpdpwusd_512:
5639 case Intrinsic::x86_avx2_vpdpwusds_128:
5640 case Intrinsic::x86_avx2_vpdpwusds_256:
5641 case Intrinsic::x86_avx10_vpdpwusds_512:
5642 case Intrinsic::x86_avx2_vpdpwuud_128:
5643 case Intrinsic::x86_avx2_vpdpwuud_256:
5644 case Intrinsic::x86_avx10_vpdpwuud_512:
5645 case Intrinsic::x86_avx2_vpdpwuuds_128:
5646 case Intrinsic::x86_avx2_vpdpwuuds_256:
5647 case Intrinsic::x86_avx10_vpdpwuuds_512:
5652 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5653 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5655 NewCall = Builder.CreateCall(NewFn, Args);
5658 assert(NewCall &&
"Should have either set this variable or returned through "
5659 "the default case");
5666 assert(
F &&
"Illegal attempt to upgrade a non-existent intrinsic.");
5680 F->eraseFromParent();
5686 if (NumOperands == 0)
5694 if (NumOperands == 3) {
5698 Metadata *Elts2[] = {ScalarType, ScalarType,
5712 if (
Opc != Instruction::BitCast)
5716 Type *SrcTy = V->getType();
5733 if (
Opc != Instruction::BitCast)
5736 Type *SrcTy =
C->getType();
5763 if (
NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5764 auto OpIt =
find_if(ModFlags->operands(), [](
const MDNode *Flag) {
5765 if (Flag->getNumOperands() < 3)
5767 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5768 return K->getString() ==
"Debug Info Version";
5771 if (OpIt != ModFlags->op_end()) {
5772 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5779 bool BrokenDebugInfo =
false;
5782 if (!BrokenDebugInfo)
5788 M.getContext().diagnose(Diag);
5795 M.getContext().diagnose(DiagVersion);
5805 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5808 if (
F->hasFnAttribute(Attr)) {
5811 StringRef S =
F->getFnAttribute(Attr).getValueAsString();
5813 auto [Part, Rest] = S.
split(
',');
5819 const unsigned Dim = DimC -
'x';
5820 assert(Dim < 3 &&
"Unexpected dim char");
5830 F->addFnAttr(Attr, NewAttr);
5834 return S ==
"x" || S ==
"y" || S ==
"z";
5839 if (K ==
"kernel") {
5851 const unsigned Idx = (AlignIdxValuePair >> 16);
5852 const Align StackAlign =
Align(AlignIdxValuePair & 0xFFFF);
5857 if (K ==
"maxclusterrank" || K ==
"cluster_max_blocks") {
5862 if (K ==
"minctasm") {
5867 if (K ==
"maxnreg") {
5872 if (K.consume_front(
"maxntid") &&
isXYZ(K)) {
5876 if (K.consume_front(
"reqntid") &&
isXYZ(K)) {
5880 if (K.consume_front(
"cluster_dim_") &&
isXYZ(K)) {
5884 if (K ==
"grid_constant") {
5899 NamedMDNode *NamedMD = M.getNamedMetadata(
"nvvm.annotations");
5906 if (!SeenNodes.
insert(MD).second)
5913 assert((MD->getNumOperands() % 2) == 1 &&
"Invalid number of operands");
5920 for (
unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
5922 const MDOperand &V = MD->getOperand(j + 1);
5925 NewOperands.
append({K, V});
5928 if (NewOperands.
size() > 1)
5941 const char *MarkerKey =
"clang.arc.retainAutoreleasedReturnValueMarker";
5942 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
5943 if (ModRetainReleaseMarker) {
5949 ID->getString().split(ValueComp,
"#");
5950 if (ValueComp.
size() == 2) {
5951 std::string NewValue = ValueComp[0].str() +
";" + ValueComp[1].str();
5955 M.eraseNamedMetadata(ModRetainReleaseMarker);
5966 auto UpgradeToIntrinsic = [&](
const char *OldFunc,
5992 bool InvalidCast =
false;
5994 for (
unsigned I = 0, E = CI->
arg_size();
I != E; ++
I) {
6007 Arg = Builder.CreateBitCast(Arg, NewFuncTy->
getParamType(
I));
6009 Args.push_back(Arg);
6016 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
6021 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->
getType());
6034 UpgradeToIntrinsic(
"clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
6042 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
6043 {
"objc_autorelease", llvm::Intrinsic::objc_autorelease},
6044 {
"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
6045 {
"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
6046 {
"objc_autoreleaseReturnValue",
6047 llvm::Intrinsic::objc_autoreleaseReturnValue},
6048 {
"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
6049 {
"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
6050 {
"objc_initWeak", llvm::Intrinsic::objc_initWeak},
6051 {
"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
6052 {
"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
6053 {
"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
6054 {
"objc_release", llvm::Intrinsic::objc_release},
6055 {
"objc_retain", llvm::Intrinsic::objc_retain},
6056 {
"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
6057 {
"objc_retainAutoreleaseReturnValue",
6058 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
6059 {
"objc_retainAutoreleasedReturnValue",
6060 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
6061 {
"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
6062 {
"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
6063 {
"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
6064 {
"objc_unsafeClaimAutoreleasedReturnValue",
6065 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
6066 {
"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
6067 {
"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
6068 {
"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
6069 {
"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
6070 {
"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
6071 {
"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
6072 {
"objc_arc_annotation_topdown_bbstart",
6073 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
6074 {
"objc_arc_annotation_topdown_bbend",
6075 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
6076 {
"objc_arc_annotation_bottomup_bbstart",
6077 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
6078 {
"objc_arc_annotation_bottomup_bbend",
6079 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
6081 for (
auto &
I : RuntimeFuncs)
6082 UpgradeToIntrinsic(
I.first,
I.second);
6086 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6090 bool HasObjCFlag =
false, HasClassProperties =
false,
Changed =
false;
6091 bool HasSwiftVersionFlag =
false;
6092 uint8_t SwiftMajorVersion, SwiftMinorVersion;
6099 if (
Op->getNumOperands() != 3)
6113 if (
ID->getString() ==
"Objective-C Image Info Version")
6115 if (
ID->getString() ==
"Objective-C Class Properties")
6116 HasClassProperties =
true;
6118 if (
ID->getString() ==
"PIC Level") {
6119 if (
auto *Behavior =
6121 uint64_t V = Behavior->getLimitedValue();
6127 if (
ID->getString() ==
"PIE Level")
6128 if (
auto *Behavior =
6135 if (
ID->getString() ==
"branch-target-enforcement" ||
6136 ID->getString().starts_with(
"sign-return-address")) {
6137 if (
auto *Behavior =
6143 Op->getOperand(1),
Op->getOperand(2)};
6153 if (
ID->getString() ==
"Objective-C Image Info Section") {
6156 Value->getString().split(ValueComp,
" ");
6157 if (ValueComp.
size() != 1) {
6158 std::string NewValue;
6159 for (
auto &S : ValueComp)
6160 NewValue += S.str();
6171 if (
ID->getString() ==
"Objective-C Garbage Collection") {
6174 assert(Md->getValue() &&
"Expected non-empty metadata");
6175 auto Type = Md->getValue()->getType();
6178 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6179 if ((Val & 0xff) != Val) {
6180 HasSwiftVersionFlag =
true;
6181 SwiftABIVersion = (Val & 0xff00) >> 8;
6182 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6183 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6194 if (
ID->getString() ==
"amdgpu_code_object_version") {
6197 MDString::get(M.getContext(),
"amdhsa_code_object_version"),
6209 if (HasObjCFlag && !HasClassProperties) {
6215 if (HasSwiftVersionFlag) {
6219 ConstantInt::get(Int8Ty, SwiftMajorVersion));
6221 ConstantInt::get(Int8Ty, SwiftMinorVersion));
6229 auto TrimSpaces = [](
StringRef Section) -> std::string {
6231 Section.split(Components,
',');
6236 for (
auto Component : Components)
6237 OS <<
',' << Component.trim();
6242 for (
auto &GV : M.globals()) {
6243 if (!GV.hasSection())
6248 if (!Section.starts_with(
"__DATA, __objc_catlist"))
6253 GV.setSection(TrimSpaces(Section));
6269struct StrictFPUpgradeVisitor :
public InstVisitor<StrictFPUpgradeVisitor> {
6270 StrictFPUpgradeVisitor() =
default;
6273 if (!
Call.isStrictFP())
6279 Call.removeFnAttr(Attribute::StrictFP);
6280 Call.addFnAttr(Attribute::NoBuiltin);
6285struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6286 :
public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6287 AMDGPUUnsafeFPAtomicsUpgradeVisitor() =
default;
6289 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6304 if (!
F.isDeclaration() && !
F.hasFnAttribute(Attribute::StrictFP)) {
6305 StrictFPUpgradeVisitor SFPV;
6310 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6311 F.getReturnType(),
F.getAttributes().getRetAttrs()));
6312 for (
auto &Arg :
F.args())
6314 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6318 if (
Attribute A =
F.getFnAttribute(
"implicit-section-name");
6319 A.isValid() &&
A.isStringAttribute()) {
6320 F.setSection(
A.getValueAsString());
6321 F.removeFnAttr(
"implicit-section-name");
6328 if (
Attribute A =
F.getFnAttribute(
"amdgpu-unsafe-fp-atomics");
6331 if (
A.getValueAsBool()) {
6332 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6338 F.removeFnAttr(
"amdgpu-unsafe-fp-atomics");
6346 if (!
F.hasFnAttribute(FnAttrName))
6347 F.addFnAttr(FnAttrName,
Value);
6354 if (!
F.hasFnAttribute(FnAttrName)) {
6356 F.addFnAttr(FnAttrName);
6358 auto A =
F.getFnAttribute(FnAttrName);
6359 if (
"false" ==
A.getValueAsString())
6360 F.removeFnAttr(FnAttrName);
6361 else if (
"true" ==
A.getValueAsString()) {
6362 F.removeFnAttr(FnAttrName);
6363 F.addFnAttr(FnAttrName);
6369 Triple T(M.getTargetTriple());
6370 if (!
T.isThumb() && !
T.isARM() && !
T.isAArch64())
6380 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6384 if (
Op->getNumOperands() != 3)
6393 uint64_t *ValPtr = IDStr ==
"branch-target-enforcement" ? &BTEValue
6394 : IDStr ==
"branch-protection-pauth-lr" ? &BPPLRValue
6395 : IDStr ==
"guarded-control-stack" ? &GCSValue
6396 : IDStr ==
"sign-return-address" ? &SRAValue
6397 : IDStr ==
"sign-return-address-all" ? &SRAALLValue
6398 : IDStr ==
"sign-return-address-with-bkey"
6404 *ValPtr = CI->getZExtValue();
6410 bool BTE = BTEValue == 1;
6411 bool BPPLR = BPPLRValue == 1;
6412 bool GCS = GCSValue == 1;
6413 bool SRA = SRAValue == 1;
6416 if (SRA && SRAALLValue == 1)
6417 SignTypeValue =
"all";
6420 if (SRA && SRABKeyValue == 1)
6421 SignKeyValue =
"b_key";
6423 for (
Function &
F : M.getFunctionList()) {
6424 if (
F.isDeclaration())
6431 if (
auto A =
F.getFnAttribute(
"sign-return-address");
6432 A.isValid() &&
"none" ==
A.getValueAsString()) {
6433 F.removeFnAttr(
"sign-return-address");
6434 F.removeFnAttr(
"sign-return-address-key");
6450 if (SRAALLValue == 1)
6452 if (SRABKeyValue == 1)
6461 if (
T->getNumOperands() < 1)
6466 return S->getString().starts_with(
"llvm.vectorizer.");
6470 StringRef OldPrefix =
"llvm.vectorizer.";
6473 if (OldTag ==
"llvm.vectorizer.unroll")
6485 if (
T->getNumOperands() < 1)
6490 if (!OldTag->getString().starts_with(
"llvm.vectorizer."))
6495 Ops.reserve(
T->getNumOperands());
6497 for (
unsigned I = 1,
E =
T->getNumOperands();
I !=
E; ++
I)
6498 Ops.push_back(
T->getOperand(
I));
6512 Ops.reserve(
T->getNumOperands());
6523 if ((
T.isSPIR() || (
T.isSPIRV() && !
T.isSPIRVLogical())) &&
6524 !
DL.contains(
"-G") && !
DL.starts_with(
"G")) {
6525 return DL.empty() ? std::string(
"G1") : (
DL +
"-G1").str();
6528 if (
T.isLoongArch64() ||
T.isRISCV64()) {
6530 auto I =
DL.find(
"-n64-");
6532 return (
DL.take_front(
I) +
"-n32:64-" +
DL.drop_front(
I + 5)).str();
6537 std::string Res =
DL.str();
6540 if (!
DL.contains(
"-G") && !
DL.starts_with(
"G"))
6541 Res.append(Res.empty() ?
"G1" :
"-G1");
6549 if (!
DL.contains(
"-ni") && !
DL.starts_with(
"ni"))
6550 Res.append(
"-ni:7:8:9");
6552 if (
DL.ends_with(
"ni:7"))
6554 if (
DL.ends_with(
"ni:7:8"))
6559 if (!
DL.contains(
"-p7") && !
DL.starts_with(
"p7"))
6560 Res.append(
"-p7:160:256:256:32");
6561 if (!
DL.contains(
"-p8") && !
DL.starts_with(
"p8"))
6562 Res.append(
"-p8:128:128:128:48");
6563 constexpr StringRef OldP8(
"-p8:128:128-");
6564 if (
DL.contains(OldP8))
6565 Res.replace(Res.find(OldP8), OldP8.
size(),
"-p8:128:128:128:48-");
6566 if (!
DL.contains(
"-p9") && !
DL.starts_with(
"p9"))
6567 Res.append(
"-p9:192:256:256:32");
6571 if (!
DL.contains(
"m:e"))
6572 Res = Res.empty() ?
"m:e" :
"m:e-" + Res;
6577 if (
T.isSystemZ() && !
DL.empty()) {
6579 if (!
DL.contains(
"-S64"))
6580 return "E-S64" +
DL.drop_front(1).str();
6584 auto AddPtr32Ptr64AddrSpaces = [&
DL, &Res]() {
6587 StringRef AddrSpaces{
"-p270:32:32-p271:32:32-p272:64:64"};
6588 if (!
DL.contains(AddrSpaces)) {
6590 Regex R(
"^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6591 if (R.match(Res, &
Groups))
6597 if (
T.isAArch64()) {
6599 if (!
DL.empty() && !
DL.contains(
"-Fn32"))
6600 Res.append(
"-Fn32");
6601 AddPtr32Ptr64AddrSpaces();
6605 if (
T.isSPARC() || (
T.isMIPS64() && !
DL.contains(
"m:m")) ||
T.isPPC64() ||
6609 std::string I64 =
"-i64:64";
6610 std::string I128 =
"-i128:128";
6612 size_t Pos = Res.find(I64);
6613 if (Pos !=
size_t(-1))
6614 Res.insert(Pos + I64.size(), I128);
6618 if (
T.isPPC() &&
T.isOSAIX() && !
DL.contains(
"f64:32:64") && !
DL.empty()) {
6619 size_t Pos = Res.find(
"-S128");
6622 Res.insert(Pos,
"-f64:32:64");
6628 AddPtr32Ptr64AddrSpaces();
6636 if (!
T.isOSIAMCU()) {
6637 std::string I128 =
"-i128:128";
6640 Regex R(
"^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6641 if (R.match(Res, &
Groups))
6649 if (
T.isWindowsMSVCEnvironment() && !
T.isArch64Bit()) {
6651 auto I =
Ref.find(
"-f80:32-");
6653 Res = (
Ref.take_front(
I) +
"-f80:128-" +
Ref.drop_front(
I + 8)).str();
6661 Attribute A =
B.getAttribute(
"no-frame-pointer-elim");
6664 FramePointer =
A.getValueAsString() ==
"true" ?
"all" :
"none";
6665 B.removeAttribute(
"no-frame-pointer-elim");
6667 if (
B.contains(
"no-frame-pointer-elim-non-leaf")) {
6669 if (FramePointer !=
"all")
6670 FramePointer =
"non-leaf";
6671 B.removeAttribute(
"no-frame-pointer-elim-non-leaf");
6673 if (!FramePointer.
empty())
6674 B.addAttribute(
"frame-pointer", FramePointer);
6676 A =
B.getAttribute(
"null-pointer-is-valid");
6679 bool NullPointerIsValid =
A.getValueAsString() ==
"true";
6680 B.removeAttribute(
"null-pointer-is-valid");
6681 if (NullPointerIsValid)
6682 B.addAttribute(Attribute::NullPointerIsValid);
6692 return OBD.
getTag() ==
"clang.arc.attachedcall" &&
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static bool convertIntrinsicValidType(StringRef Name, const FunctionType *FuncTy)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeVectorSplice(CallBase *CI, IRBuilder<> &Builder)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static Value * upgradeConvertIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)
static MDNode * getDebugLocSafe(const Instruction *I)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
NVPTX address space definition.
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
Functions, function parameters, and return types can have attributes to indicate how they should be t...
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Convenience struct for specifying and reasoning about fast-math flags.
void setApproxFunc(bool B=true)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
const Function & getFunction() const
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Type * getReturnType() const
Returns the type of the ret val.
Argument * getArg(unsigned i) const
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
const MDOperand & getOperand(unsigned I) const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
unsigned getNumOperands() const
Return number of MDNode operands.
LLVMContext & getContext() const
Tracking metadata reference owned by Metadata.
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
A Module instance is used to store all the information related to an LLVM module.
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
@ Min
Takes the min of the two values, which are required to be integers.
@ Max
Takes the max of the two values, which are required to be integers.
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
static constexpr size_t npos
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
empty - Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
size - Get the string size.
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< user_iterator > users()
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
const ParentTy * getParent() const
self_iterator getIterator()
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
@ ADDRESS_SPACE_SHARED_CLUSTER
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
FunctionAddr VTableAddr uintptr_t uintptr_t Version
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
OperandBundleDefT< Value * > OperandBundleDef
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
@ Default
The result values are uniform if and only if all operands are uniform.
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.