30#include "llvm/IR/IntrinsicsAArch64.h"
31#include "llvm/IR/IntrinsicsARM.h"
32#include "llvm/IR/IntrinsicsNVPTX.h"
33#include "llvm/IR/IntrinsicsRISCV.h"
34#include "llvm/IR/IntrinsicsWebAssembly.h"
35#include "llvm/IR/IntrinsicsX86.h"
53 cl::desc(
"Disable autoupgrade of debug info"));
63 Type *Arg0Type =
F->getFunctionType()->getParamType(0);
78 Type *LastArgType =
F->getFunctionType()->getParamType(
79 F->getFunctionType()->getNumParams() - 1);
94 if (
F->getReturnType()->isVectorTy())
104 if (
F->getReturnType()->getScalarType()->isBFloatTy())
114 if (
F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
128 if (
Name.consume_front(
"avx."))
129 return (
Name.starts_with(
"blend.p") ||
130 Name ==
"cvt.ps2.pd.256" ||
131 Name ==
"cvtdq2.pd.256" ||
132 Name ==
"cvtdq2.ps.256" ||
133 Name.starts_with(
"movnt.") ||
134 Name.starts_with(
"sqrt.p") ||
135 Name.starts_with(
"storeu.") ||
136 Name.starts_with(
"vbroadcast.s") ||
137 Name.starts_with(
"vbroadcastf128") ||
138 Name.starts_with(
"vextractf128.") ||
139 Name.starts_with(
"vinsertf128.") ||
140 Name.starts_with(
"vperm2f128.") ||
141 Name.starts_with(
"vpermil."));
143 if (
Name.consume_front(
"avx2."))
144 return (
Name ==
"movntdqa" ||
145 Name.starts_with(
"pabs.") ||
146 Name.starts_with(
"padds.") ||
147 Name.starts_with(
"paddus.") ||
148 Name.starts_with(
"pblendd.") ||
150 Name.starts_with(
"pbroadcast") ||
151 Name.starts_with(
"pcmpeq.") ||
152 Name.starts_with(
"pcmpgt.") ||
153 Name.starts_with(
"pmax") ||
154 Name.starts_with(
"pmin") ||
155 Name.starts_with(
"pmovsx") ||
156 Name.starts_with(
"pmovzx") ||
158 Name ==
"pmulu.dq" ||
159 Name.starts_with(
"psll.dq") ||
160 Name.starts_with(
"psrl.dq") ||
161 Name.starts_with(
"psubs.") ||
162 Name.starts_with(
"psubus.") ||
163 Name.starts_with(
"vbroadcast") ||
164 Name ==
"vbroadcasti128" ||
165 Name ==
"vextracti128" ||
166 Name ==
"vinserti128" ||
167 Name ==
"vperm2i128");
169 if (
Name.consume_front(
"avx512.")) {
170 if (
Name.consume_front(
"mask."))
172 return (
Name.starts_with(
"add.p") ||
173 Name.starts_with(
"and.") ||
174 Name.starts_with(
"andn.") ||
175 Name.starts_with(
"broadcast.s") ||
176 Name.starts_with(
"broadcastf32x4.") ||
177 Name.starts_with(
"broadcastf32x8.") ||
178 Name.starts_with(
"broadcastf64x2.") ||
179 Name.starts_with(
"broadcastf64x4.") ||
180 Name.starts_with(
"broadcasti32x4.") ||
181 Name.starts_with(
"broadcasti32x8.") ||
182 Name.starts_with(
"broadcasti64x2.") ||
183 Name.starts_with(
"broadcasti64x4.") ||
184 Name.starts_with(
"cmp.b") ||
185 Name.starts_with(
"cmp.d") ||
186 Name.starts_with(
"cmp.q") ||
187 Name.starts_with(
"cmp.w") ||
188 Name.starts_with(
"compress.b") ||
189 Name.starts_with(
"compress.d") ||
190 Name.starts_with(
"compress.p") ||
191 Name.starts_with(
"compress.q") ||
192 Name.starts_with(
"compress.store.") ||
193 Name.starts_with(
"compress.w") ||
194 Name.starts_with(
"conflict.") ||
195 Name.starts_with(
"cvtdq2pd.") ||
196 Name.starts_with(
"cvtdq2ps.") ||
197 Name ==
"cvtpd2dq.256" ||
198 Name ==
"cvtpd2ps.256" ||
199 Name ==
"cvtps2pd.128" ||
200 Name ==
"cvtps2pd.256" ||
201 Name.starts_with(
"cvtqq2pd.") ||
202 Name ==
"cvtqq2ps.256" ||
203 Name ==
"cvtqq2ps.512" ||
204 Name ==
"cvttpd2dq.256" ||
205 Name ==
"cvttps2dq.128" ||
206 Name ==
"cvttps2dq.256" ||
207 Name.starts_with(
"cvtudq2pd.") ||
208 Name.starts_with(
"cvtudq2ps.") ||
209 Name.starts_with(
"cvtuqq2pd.") ||
210 Name ==
"cvtuqq2ps.256" ||
211 Name ==
"cvtuqq2ps.512" ||
212 Name.starts_with(
"dbpsadbw.") ||
213 Name.starts_with(
"div.p") ||
214 Name.starts_with(
"expand.b") ||
215 Name.starts_with(
"expand.d") ||
216 Name.starts_with(
"expand.load.") ||
217 Name.starts_with(
"expand.p") ||
218 Name.starts_with(
"expand.q") ||
219 Name.starts_with(
"expand.w") ||
220 Name.starts_with(
"fpclass.p") ||
221 Name.starts_with(
"insert") ||
222 Name.starts_with(
"load.") ||
223 Name.starts_with(
"loadu.") ||
224 Name.starts_with(
"lzcnt.") ||
225 Name.starts_with(
"max.p") ||
226 Name.starts_with(
"min.p") ||
227 Name.starts_with(
"movddup") ||
228 Name.starts_with(
"move.s") ||
229 Name.starts_with(
"movshdup") ||
230 Name.starts_with(
"movsldup") ||
231 Name.starts_with(
"mul.p") ||
232 Name.starts_with(
"or.") ||
233 Name.starts_with(
"pabs.") ||
234 Name.starts_with(
"packssdw.") ||
235 Name.starts_with(
"packsswb.") ||
236 Name.starts_with(
"packusdw.") ||
237 Name.starts_with(
"packuswb.") ||
238 Name.starts_with(
"padd.") ||
239 Name.starts_with(
"padds.") ||
240 Name.starts_with(
"paddus.") ||
241 Name.starts_with(
"palignr.") ||
242 Name.starts_with(
"pand.") ||
243 Name.starts_with(
"pandn.") ||
244 Name.starts_with(
"pavg") ||
245 Name.starts_with(
"pbroadcast") ||
246 Name.starts_with(
"pcmpeq.") ||
247 Name.starts_with(
"pcmpgt.") ||
248 Name.starts_with(
"perm.df.") ||
249 Name.starts_with(
"perm.di.") ||
250 Name.starts_with(
"permvar.") ||
251 Name.starts_with(
"pmaddubs.w.") ||
252 Name.starts_with(
"pmaddw.d.") ||
253 Name.starts_with(
"pmax") ||
254 Name.starts_with(
"pmin") ||
255 Name ==
"pmov.qd.256" ||
256 Name ==
"pmov.qd.512" ||
257 Name ==
"pmov.wb.256" ||
258 Name ==
"pmov.wb.512" ||
259 Name.starts_with(
"pmovsx") ||
260 Name.starts_with(
"pmovzx") ||
261 Name.starts_with(
"pmul.dq.") ||
262 Name.starts_with(
"pmul.hr.sw.") ||
263 Name.starts_with(
"pmulh.w.") ||
264 Name.starts_with(
"pmulhu.w.") ||
265 Name.starts_with(
"pmull.") ||
266 Name.starts_with(
"pmultishift.qb.") ||
267 Name.starts_with(
"pmulu.dq.") ||
268 Name.starts_with(
"por.") ||
269 Name.starts_with(
"prol.") ||
270 Name.starts_with(
"prolv.") ||
271 Name.starts_with(
"pror.") ||
272 Name.starts_with(
"prorv.") ||
273 Name.starts_with(
"pshuf.b.") ||
274 Name.starts_with(
"pshuf.d.") ||
275 Name.starts_with(
"pshufh.w.") ||
276 Name.starts_with(
"pshufl.w.") ||
277 Name.starts_with(
"psll.d") ||
278 Name.starts_with(
"psll.q") ||
279 Name.starts_with(
"psll.w") ||
280 Name.starts_with(
"pslli") ||
281 Name.starts_with(
"psllv") ||
282 Name.starts_with(
"psra.d") ||
283 Name.starts_with(
"psra.q") ||
284 Name.starts_with(
"psra.w") ||
285 Name.starts_with(
"psrai") ||
286 Name.starts_with(
"psrav") ||
287 Name.starts_with(
"psrl.d") ||
288 Name.starts_with(
"psrl.q") ||
289 Name.starts_with(
"psrl.w") ||
290 Name.starts_with(
"psrli") ||
291 Name.starts_with(
"psrlv") ||
292 Name.starts_with(
"psub.") ||
293 Name.starts_with(
"psubs.") ||
294 Name.starts_with(
"psubus.") ||
295 Name.starts_with(
"pternlog.") ||
296 Name.starts_with(
"punpckh") ||
297 Name.starts_with(
"punpckl") ||
298 Name.starts_with(
"pxor.") ||
299 Name.starts_with(
"shuf.f") ||
300 Name.starts_with(
"shuf.i") ||
301 Name.starts_with(
"shuf.p") ||
302 Name.starts_with(
"sqrt.p") ||
303 Name.starts_with(
"store.b.") ||
304 Name.starts_with(
"store.d.") ||
305 Name.starts_with(
"store.p") ||
306 Name.starts_with(
"store.q.") ||
307 Name.starts_with(
"store.w.") ||
308 Name ==
"store.ss" ||
309 Name.starts_with(
"storeu.") ||
310 Name.starts_with(
"sub.p") ||
311 Name.starts_with(
"ucmp.") ||
312 Name.starts_with(
"unpckh.") ||
313 Name.starts_with(
"unpckl.") ||
314 Name.starts_with(
"valign.") ||
315 Name ==
"vcvtph2ps.128" ||
316 Name ==
"vcvtph2ps.256" ||
317 Name.starts_with(
"vextract") ||
318 Name.starts_with(
"vfmadd.") ||
319 Name.starts_with(
"vfmaddsub.") ||
320 Name.starts_with(
"vfnmadd.") ||
321 Name.starts_with(
"vfnmsub.") ||
322 Name.starts_with(
"vpdpbusd.") ||
323 Name.starts_with(
"vpdpbusds.") ||
324 Name.starts_with(
"vpdpwssd.") ||
325 Name.starts_with(
"vpdpwssds.") ||
326 Name.starts_with(
"vpermi2var.") ||
327 Name.starts_with(
"vpermil.p") ||
328 Name.starts_with(
"vpermilvar.") ||
329 Name.starts_with(
"vpermt2var.") ||
330 Name.starts_with(
"vpmadd52") ||
331 Name.starts_with(
"vpshld.") ||
332 Name.starts_with(
"vpshldv.") ||
333 Name.starts_with(
"vpshrd.") ||
334 Name.starts_with(
"vpshrdv.") ||
335 Name.starts_with(
"vpshufbitqmb.") ||
336 Name.starts_with(
"xor."));
338 if (
Name.consume_front(
"mask3."))
340 return (
Name.starts_with(
"vfmadd.") ||
341 Name.starts_with(
"vfmaddsub.") ||
342 Name.starts_with(
"vfmsub.") ||
343 Name.starts_with(
"vfmsubadd.") ||
344 Name.starts_with(
"vfnmsub."));
346 if (
Name.consume_front(
"maskz."))
348 return (
Name.starts_with(
"pternlog.") ||
349 Name.starts_with(
"vfmadd.") ||
350 Name.starts_with(
"vfmaddsub.") ||
351 Name.starts_with(
"vpdpbusd.") ||
352 Name.starts_with(
"vpdpbusds.") ||
353 Name.starts_with(
"vpdpwssd.") ||
354 Name.starts_with(
"vpdpwssds.") ||
355 Name.starts_with(
"vpermt2var.") ||
356 Name.starts_with(
"vpmadd52") ||
357 Name.starts_with(
"vpshldv.") ||
358 Name.starts_with(
"vpshrdv."));
361 return (
Name ==
"movntdqa" ||
362 Name ==
"pmul.dq.512" ||
363 Name ==
"pmulu.dq.512" ||
364 Name.starts_with(
"broadcastm") ||
365 Name.starts_with(
"cmp.p") ||
366 Name.starts_with(
"cvtb2mask.") ||
367 Name.starts_with(
"cvtd2mask.") ||
368 Name.starts_with(
"cvtmask2") ||
369 Name.starts_with(
"cvtq2mask.") ||
370 Name ==
"cvtusi2sd" ||
371 Name.starts_with(
"cvtw2mask.") ||
376 Name ==
"kortestc.w" ||
377 Name ==
"kortestz.w" ||
378 Name.starts_with(
"kunpck") ||
381 Name.starts_with(
"padds.") ||
382 Name.starts_with(
"pbroadcast") ||
383 Name.starts_with(
"prol") ||
384 Name.starts_with(
"pror") ||
385 Name.starts_with(
"psll.dq") ||
386 Name.starts_with(
"psrl.dq") ||
387 Name.starts_with(
"psubs.") ||
388 Name.starts_with(
"ptestm") ||
389 Name.starts_with(
"ptestnm") ||
390 Name.starts_with(
"storent.") ||
391 Name.starts_with(
"vbroadcast.s") ||
392 Name.starts_with(
"vpshld.") ||
393 Name.starts_with(
"vpshrd."));
396 if (
Name.consume_front(
"fma."))
397 return (
Name.starts_with(
"vfmadd.") ||
398 Name.starts_with(
"vfmsub.") ||
399 Name.starts_with(
"vfmsubadd.") ||
400 Name.starts_with(
"vfnmadd.") ||
401 Name.starts_with(
"vfnmsub."));
403 if (
Name.consume_front(
"fma4."))
404 return Name.starts_with(
"vfmadd.s");
406 if (
Name.consume_front(
"sse."))
407 return (
Name ==
"add.ss" ||
408 Name ==
"cvtsi2ss" ||
409 Name ==
"cvtsi642ss" ||
412 Name.starts_with(
"sqrt.p") ||
414 Name.starts_with(
"storeu.") ||
417 if (
Name.consume_front(
"sse2."))
418 return (
Name ==
"add.sd" ||
419 Name ==
"cvtdq2pd" ||
420 Name ==
"cvtdq2ps" ||
421 Name ==
"cvtps2pd" ||
422 Name ==
"cvtsi2sd" ||
423 Name ==
"cvtsi642sd" ||
424 Name ==
"cvtss2sd" ||
427 Name.starts_with(
"padds.") ||
428 Name.starts_with(
"paddus.") ||
429 Name.starts_with(
"pcmpeq.") ||
430 Name.starts_with(
"pcmpgt.") ||
435 Name ==
"pmulu.dq" ||
436 Name.starts_with(
"pshuf") ||
437 Name.starts_with(
"psll.dq") ||
438 Name.starts_with(
"psrl.dq") ||
439 Name.starts_with(
"psubs.") ||
440 Name.starts_with(
"psubus.") ||
441 Name.starts_with(
"sqrt.p") ||
443 Name ==
"storel.dq" ||
444 Name.starts_with(
"storeu.") ||
447 if (
Name.consume_front(
"sse41."))
448 return (
Name.starts_with(
"blendp") ||
449 Name ==
"movntdqa" ||
459 Name.starts_with(
"pmovsx") ||
460 Name.starts_with(
"pmovzx") ||
463 if (
Name.consume_front(
"sse42."))
464 return Name ==
"crc32.64.8";
466 if (
Name.consume_front(
"sse4a."))
467 return Name.starts_with(
"movnt.");
469 if (
Name.consume_front(
"ssse3."))
470 return (
Name ==
"pabs.b.128" ||
471 Name ==
"pabs.d.128" ||
472 Name ==
"pabs.w.128");
474 if (
Name.consume_front(
"xop."))
475 return (
Name ==
"vpcmov" ||
476 Name ==
"vpcmov.256" ||
477 Name.starts_with(
"vpcom") ||
478 Name.starts_with(
"vprot"));
480 return (
Name ==
"addcarry.u32" ||
481 Name ==
"addcarry.u64" ||
482 Name ==
"addcarryx.u32" ||
483 Name ==
"addcarryx.u64" ||
484 Name ==
"subborrow.u32" ||
485 Name ==
"subborrow.u64" ||
486 Name.starts_with(
"vcvtph2ps."));
492 if (!
Name.consume_front(
"x86."))
500 if (
Name ==
"rdtscp") {
502 if (
F->getFunctionType()->getNumParams() == 0)
507 Intrinsic::x86_rdtscp);
514 if (
Name.consume_front(
"sse41.ptest")) {
516 .
Case(
"c", Intrinsic::x86_sse41_ptestc)
517 .
Case(
"z", Intrinsic::x86_sse41_ptestz)
518 .
Case(
"nzc", Intrinsic::x86_sse41_ptestnzc)
531 .
Case(
"sse41.insertps", Intrinsic::x86_sse41_insertps)
532 .
Case(
"sse41.dppd", Intrinsic::x86_sse41_dppd)
533 .
Case(
"sse41.dpps", Intrinsic::x86_sse41_dpps)
534 .
Case(
"sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
535 .
Case(
"avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
536 .
Case(
"avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
541 if (
Name.consume_front(
"avx512.mask.cmp.")) {
544 .
Case(
"pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
545 .
Case(
"pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
546 .
Case(
"pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
547 .
Case(
"ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
548 .
Case(
"ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
549 .
Case(
"ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
556 if (
Name.consume_front(
"avx512bf16.")) {
559 .
Case(
"cvtne2ps2bf16.128",
560 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
561 .
Case(
"cvtne2ps2bf16.256",
562 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
563 .
Case(
"cvtne2ps2bf16.512",
564 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
565 .
Case(
"mask.cvtneps2bf16.128",
566 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
567 .
Case(
"cvtneps2bf16.256",
568 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
569 .
Case(
"cvtneps2bf16.512",
570 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
577 .
Case(
"dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
578 .
Case(
"dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
579 .
Case(
"dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
586 if (
Name.consume_front(
"xop.")) {
588 if (
Name.starts_with(
"vpermil2")) {
591 auto Idx =
F->getFunctionType()->getParamType(2);
592 if (
Idx->isFPOrFPVectorTy()) {
593 unsigned IdxSize =
Idx->getPrimitiveSizeInBits();
594 unsigned EltSize =
Idx->getScalarSizeInBits();
595 if (EltSize == 64 && IdxSize == 128)
596 ID = Intrinsic::x86_xop_vpermil2pd;
597 else if (EltSize == 32 && IdxSize == 128)
598 ID = Intrinsic::x86_xop_vpermil2ps;
599 else if (EltSize == 64 && IdxSize == 256)
600 ID = Intrinsic::x86_xop_vpermil2pd_256;
602 ID = Intrinsic::x86_xop_vpermil2ps_256;
604 }
else if (
F->arg_size() == 2)
607 .
Case(
"vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
608 .
Case(
"vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
619 if (
Name ==
"seh.recoverfp") {
621 Intrinsic::eh_recoverfp);
633 if (
Name.starts_with(
"rbit")) {
636 F->getParent(), Intrinsic::bitreverse,
F->arg_begin()->getType());
640 if (
Name ==
"thread.pointer") {
643 Intrinsic::thread_pointer);
647 bool Neon =
Name.consume_front(
"neon.");
652 if (
Name.consume_front(
"bfdot.")) {
656 .
Cases(
"v2f32.v8i8",
"v4f32.v16i8",
661 size_t OperandWidth =
F->getReturnType()->getPrimitiveSizeInBits();
662 assert((OperandWidth == 64 || OperandWidth == 128) &&
663 "Unexpected operand width");
665 std::array<Type *, 2> Tys{
676 if (
Name.consume_front(
"bfm")) {
678 if (
Name.consume_back(
".v4f32.v16i8")) {
718 F->arg_begin()->getType());
722 if (
Name.consume_front(
"vst")) {
724 static const Regex vstRegex(
"^([1234]|[234]lane)\\.v[a-z0-9]*$");
728 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
729 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
732 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
733 Intrinsic::arm_neon_vst4lane};
735 auto fArgs =
F->getFunctionType()->params();
736 Type *Tys[] = {fArgs[0], fArgs[1]};
739 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
742 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
751 if (
Name.consume_front(
"mve.")) {
753 if (
Name ==
"vctp64") {
754 if (cast<FixedVectorType>(
F->getReturnType())->getNumElements() == 4) {
764 if (
Name.consume_back(
".v4i1")) {
766 if (
Name.consume_back(
".predicated.v2i64.v4i32"))
768 return Name ==
"mull.int" ||
Name ==
"vqdmull";
770 if (
Name.consume_back(
".v2i64")) {
772 bool IsGather =
Name.consume_front(
"vldr.gather.");
773 if (IsGather ||
Name.consume_front(
"vstr.scatter.")) {
774 if (
Name.consume_front(
"base.")) {
776 Name.consume_front(
"wb.");
779 return Name ==
"predicated.v2i64";
782 if (
Name.consume_front(
"offset.predicated."))
783 return Name == (IsGather ?
"v2i64.p0i64" :
"p0i64.v2i64") ||
784 Name == (IsGather ?
"v2i64.p0" :
"p0.v2i64");
797 if (
Name.consume_front(
"cde.vcx")) {
799 if (
Name.consume_back(
".predicated.v2i64.v4i1"))
816 F->arg_begin()->getType());
820 if (
Name.starts_with(
"addp")) {
822 if (
F->arg_size() != 2)
824 VectorType *Ty = dyn_cast<VectorType>(
F->getReturnType());
825 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
827 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
833 if (
Name.consume_front(
"sve.")) {
835 if (
Name.consume_front(
"bf")) {
836 if (
Name.consume_back(
".lane")) {
840 .
Case(
"dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
841 .
Case(
"mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
842 .
Case(
"mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
854 if (
Name ==
"fcvt.bf16f32" ||
Name ==
"fcvtnt.bf16f32") {
859 if (
Name.consume_front(
"addqv")) {
861 if (!
F->getReturnType()->isFPOrFPVectorTy())
864 auto Args =
F->getFunctionType()->params();
865 Type *Tys[] = {
F->getReturnType(), Args[1]};
867 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
871 if (
Name.consume_front(
"ld")) {
873 static const Regex LdRegex(
"^[234](.nxv[a-z0-9]+|$)");
876 cast<VectorType>(
F->getReturnType())->getElementType();
878 cast<VectorType>(
F->arg_begin()->getType())->getElementCount();
879 Type *Ty = VectorType::get(ScalarTy, EC);
881 Intrinsic::aarch64_sve_ld2_sret,
882 Intrinsic::aarch64_sve_ld3_sret,
883 Intrinsic::aarch64_sve_ld4_sret,
886 LoadIDs[
Name[0] -
'2'], Ty);
892 if (
Name.consume_front(
"tuple.")) {
894 if (
Name.starts_with(
"get")) {
896 Type *Tys[] = {
F->getReturnType(),
F->arg_begin()->getType()};
898 F->getParent(), Intrinsic::vector_extract, Tys);
902 if (
Name.starts_with(
"set")) {
904 auto Args =
F->getFunctionType()->params();
905 Type *Tys[] = {Args[0], Args[2], Args[1]};
907 F->getParent(), Intrinsic::vector_insert, Tys);
911 static const Regex CreateTupleRegex(
"^create[234](.nxv[a-z0-9]+|$)");
914 auto Args =
F->getFunctionType()->params();
915 Type *Tys[] = {
F->getReturnType(), Args[1]};
917 F->getParent(), Intrinsic::vector_insert, Tys);
929 if (
Name.consume_front(
"abs."))
931 .
Case(
"bf16", Intrinsic::nvvm_abs_bf16)
932 .
Case(
"bf16x2", Intrinsic::nvvm_abs_bf16x2)
935 if (
Name.consume_front(
"fma.rn."))
937 .
Case(
"bf16", Intrinsic::nvvm_fma_rn_bf16)
938 .
Case(
"bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
939 .
Case(
"ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
940 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
941 .
Case(
"ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
942 .
Case(
"ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
943 .
Case(
"ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
944 .
Case(
"ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
945 .
Case(
"relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
946 .
Case(
"relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
947 .
Case(
"sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
948 .
Case(
"sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
951 if (
Name.consume_front(
"fmax."))
953 .
Case(
"bf16", Intrinsic::nvvm_fmax_bf16)
954 .
Case(
"bf16x2", Intrinsic::nvvm_fmax_bf16x2)
955 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
956 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
957 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
958 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
959 .
Case(
"ftz.nan.xorsign.abs.bf16",
960 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
961 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
962 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
963 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
964 .
Case(
"ftz.xorsign.abs.bf16x2",
965 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
966 .
Case(
"nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
967 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
968 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
969 .
Case(
"nan.xorsign.abs.bf16x2",
970 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
971 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
972 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
975 if (
Name.consume_front(
"fmin."))
977 .
Case(
"bf16", Intrinsic::nvvm_fmin_bf16)
978 .
Case(
"bf16x2", Intrinsic::nvvm_fmin_bf16x2)
979 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
980 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
981 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
982 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
983 .
Case(
"ftz.nan.xorsign.abs.bf16",
984 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
985 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
986 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
987 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
988 .
Case(
"ftz.xorsign.abs.bf16x2",
989 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
990 .
Case(
"nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
991 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
992 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
993 .
Case(
"nan.xorsign.abs.bf16x2",
994 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
995 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
996 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
999 if (
Name.consume_front(
"neg."))
1001 .
Case(
"bf16", Intrinsic::nvvm_neg_bf16)
1002 .
Case(
"bf16x2", Intrinsic::nvvm_neg_bf16x2)
1009 bool CanUpgradeDebugIntrinsicsToRecords) {
1010 assert(
F &&
"Illegal to upgrade a non-existent Function.");
1015 if (!
Name.consume_front(
"llvm.") ||
Name.empty())
1021 bool IsArm =
Name.consume_front(
"arm.");
1022 if (IsArm ||
Name.consume_front(
"aarch64.")) {
1028 if (
Name.consume_front(
"amdgcn.")) {
1029 if (
Name ==
"alignbit") {
1032 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1036 if (
Name.consume_front(
"atomic.")) {
1037 if (
Name.starts_with(
"inc") ||
Name.starts_with(
"dec")) {
1046 if (
Name.consume_front(
"ds.") ||
Name.consume_front(
"global.atomic.") ||
1047 Name.consume_front(
"flat.atomic.")) {
1048 if (
Name.starts_with(
"fadd") ||
1050 (
Name.starts_with(
"fmin") && !
Name.starts_with(
"fmin.num")) ||
1051 (
Name.starts_with(
"fmax") && !
Name.starts_with(
"fmax.num"))) {
1059 if (
Name.starts_with(
"ldexp.")) {
1062 F->getParent(), Intrinsic::ldexp,
1063 {F->getReturnType(), F->getArg(1)->getType()});
1072 if (
F->arg_size() == 1) {
1080 F->arg_begin()->getType());
1085 if (
F->arg_size() == 2 &&
Name ==
"coro.end") {
1088 Intrinsic::coro_end);
1095 if (
Name.consume_front(
"dbg.")) {
1097 if (CanUpgradeDebugIntrinsicsToRecords &&
1098 F->getParent()->IsNewDbgInfoFormat) {
1099 if (
Name ==
"addr" ||
Name ==
"value" ||
Name ==
"assign" ||
1100 Name ==
"declare" ||
Name ==
"label") {
1109 if (
Name ==
"addr" || (
Name ==
"value" &&
F->arg_size() == 4)) {
1112 Intrinsic::dbg_value);
1119 if (
Name.consume_front(
"experimental.vector.")) {
1125 .
StartsWith(
"extract.", Intrinsic::vector_extract)
1126 .
StartsWith(
"insert.", Intrinsic::vector_insert)
1127 .
StartsWith(
"splice.", Intrinsic::vector_splice)
1128 .
StartsWith(
"reverse.", Intrinsic::vector_reverse)
1129 .
StartsWith(
"interleave2.", Intrinsic::vector_interleave2)
1130 .
StartsWith(
"deinterleave2.", Intrinsic::vector_deinterleave2)
1133 const auto *FT =
F->getFunctionType();
1135 if (
ID == Intrinsic::vector_extract ||
1136 ID == Intrinsic::vector_interleave2)
1138 Tys.push_back(FT->getReturnType());
1139 if (
ID != Intrinsic::vector_interleave2)
1140 Tys.push_back(FT->getParamType(0));
1141 if (
ID == Intrinsic::vector_insert)
1143 Tys.push_back(FT->getParamType(1));
1149 if (
Name.consume_front(
"reduce.")) {
1151 static const Regex R(
"^([a-z]+)\\.[a-z][0-9]+");
1154 .
Case(
"add", Intrinsic::vector_reduce_add)
1155 .
Case(
"mul", Intrinsic::vector_reduce_mul)
1156 .
Case(
"and", Intrinsic::vector_reduce_and)
1157 .
Case(
"or", Intrinsic::vector_reduce_or)
1158 .
Case(
"xor", Intrinsic::vector_reduce_xor)
1159 .
Case(
"smax", Intrinsic::vector_reduce_smax)
1160 .
Case(
"smin", Intrinsic::vector_reduce_smin)
1161 .
Case(
"umax", Intrinsic::vector_reduce_umax)
1162 .
Case(
"umin", Intrinsic::vector_reduce_umin)
1163 .
Case(
"fmax", Intrinsic::vector_reduce_fmax)
1164 .
Case(
"fmin", Intrinsic::vector_reduce_fmin)
1169 static const Regex R2(
"^v2\\.([a-z]+)\\.[fi][0-9]+");
1174 .
Case(
"fadd", Intrinsic::vector_reduce_fadd)
1175 .
Case(
"fmul", Intrinsic::vector_reduce_fmul)
1180 auto Args =
F->getFunctionType()->params();
1182 {Args[V2 ? 1 : 0]});
1189 if (
Name.consume_front(
"experimental.stepvector.")) {
1193 F->getParent(),
ID,
F->getFunctionType()->getReturnType());
1198 if (
Name.starts_with(
"flt.rounds")) {
1201 Intrinsic::get_rounding);
1206 if (
Name.starts_with(
"invariant.group.barrier")) {
1208 auto Args =
F->getFunctionType()->params();
1209 Type* ObjectPtr[1] = {Args[0]};
1212 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1221 .StartsWith(
"memcpy.", Intrinsic::memcpy)
1222 .StartsWith(
"memmove.", Intrinsic::memmove)
1224 if (
F->arg_size() == 5) {
1228 F->getFunctionType()->params().slice(0, 3);
1234 if (
Name.starts_with(
"memset.") &&
F->arg_size() == 5) {
1237 const auto *FT =
F->getFunctionType();
1238 Type *ParamTypes[2] = {
1239 FT->getParamType(0),
1243 Intrinsic::memset, ParamTypes);
1249 if (
Name.consume_front(
"nvvm.")) {
1251 if (
F->arg_size() == 1) {
1254 .
Cases(
"brev32",
"brev64", Intrinsic::bitreverse)
1255 .
Case(
"clz.i", Intrinsic::ctlz)
1256 .
Case(
"popc.i", Intrinsic::ctpop)
1260 {F->getReturnType()});
1266 if (!
F->getReturnType()->getScalarType()->isBFloatTy()) {
1278 bool Expand =
false;
1279 if (
Name.consume_front(
"abs."))
1281 Expand =
Name ==
"i" ||
Name ==
"ll";
1282 else if (
Name ==
"clz.ll" ||
Name ==
"popc.ll" ||
Name ==
"h2f")
1284 else if (
Name.consume_front(
"max.") ||
Name.consume_front(
"min."))
1288 else if (
Name.consume_front(
"atomic.load.add."))
1290 Expand =
Name.starts_with(
"f32.p") ||
Name.starts_with(
"f64.p");
1291 else if (
Name.consume_front(
"bitcast."))
1295 else if (
Name.consume_front(
"rotate."))
1297 Expand =
Name ==
"b32" ||
Name ==
"b64" ||
Name ==
"right.b64";
1298 else if (
Name.consume_front(
"ptr.gen.to."))
1300 Expand =
Name.starts_with(
"local") ||
Name.starts_with(
"shared") ||
1301 Name.starts_with(
"global") ||
Name.starts_with(
"constant");
1302 else if (
Name.consume_front(
"ptr."))
1305 (
Name.consume_front(
"local") ||
Name.consume_front(
"shared") ||
1306 Name.consume_front(
"global") ||
Name.consume_front(
"constant")) &&
1307 Name.starts_with(
".to.gen");
1308 else if (
Name.consume_front(
"ldg.global."))
1310 Expand = (
Name.starts_with(
"i.") ||
Name.starts_with(
"f.") ||
1311 Name.starts_with(
"p."));
1326 if (
Name.starts_with(
"objectsize.")) {
1327 Type *Tys[2] = {
F->getReturnType(),
F->arg_begin()->getType() };
1328 if (
F->arg_size() == 2 ||
F->arg_size() == 3 ||
1333 Intrinsic::objectsize, Tys);
1340 if (
Name.starts_with(
"ptr.annotation.") &&
F->arg_size() == 4) {
1343 F->getParent(), Intrinsic::ptr_annotation,
1344 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1350 if (
Name.consume_front(
"riscv.")) {
1353 .
Case(
"aes32dsi", Intrinsic::riscv_aes32dsi)
1354 .
Case(
"aes32dsmi", Intrinsic::riscv_aes32dsmi)
1355 .
Case(
"aes32esi", Intrinsic::riscv_aes32esi)
1356 .
Case(
"aes32esmi", Intrinsic::riscv_aes32esmi)
1359 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1372 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1373 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1382 .
StartsWith(
"sha256sig0", Intrinsic::riscv_sha256sig0)
1383 .
StartsWith(
"sha256sig1", Intrinsic::riscv_sha256sig1)
1384 .
StartsWith(
"sha256sum0", Intrinsic::riscv_sha256sum0)
1385 .
StartsWith(
"sha256sum1", Intrinsic::riscv_sha256sum1)
1390 if (
F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1402 if (
Name ==
"stackprotectorcheck") {
1409 if (
Name ==
"var.annotation" &&
F->arg_size() == 4) {
1412 F->getParent(), Intrinsic::var_annotation,
1413 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1420 if (
Name.consume_front(
"wasm.")) {
1423 .
StartsWith(
"fma.", Intrinsic::wasm_relaxed_madd)
1424 .
StartsWith(
"fms.", Intrinsic::wasm_relaxed_nmadd)
1425 .
StartsWith(
"laneselect.", Intrinsic::wasm_relaxed_laneselect)
1430 F->getReturnType());
1434 if (
Name.consume_front(
"dot.i8x16.i7x16.")) {
1436 .
Case(
"signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1438 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1456 auto *
ST = dyn_cast<StructType>(
F->getReturnType());
1457 if (ST && (!
ST->isLiteral() ||
ST->isPacked()) &&
1466 auto *FT =
F->getFunctionType();
1468 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1469 std::string
Name =
F->getName().str();
1472 Name,
F->getParent());
1483 if (Result != std::nullopt) {
1496 bool CanUpgradeDebugIntrinsicsToRecords) {
1500 assert(
F != NewFn &&
"Intrinsic function upgraded to the same function");
1512 GV->
getName() ==
"llvm.global_dtors")) ||
1527 unsigned N =
Init->getNumOperands();
1528 std::vector<Constant *> NewCtors(
N);
1529 for (
unsigned i = 0; i !=
N; ++i) {
1530 auto Ctor = cast<Constant>(
Init->getOperand(i));
1532 Ctor->getAggregateElement(1),
1545 auto *ResultTy = cast<FixedVectorType>(
Op->getType());
1546 unsigned NumElts = ResultTy->getNumElements() * 8;
1560 for (
unsigned l = 0; l != NumElts; l += 16)
1561 for (
unsigned i = 0; i != 16; ++i) {
1562 unsigned Idx = NumElts + i - Shift;
1564 Idx -= NumElts - 16;
1565 Idxs[l + i] =
Idx + l;
1579 auto *ResultTy = cast<FixedVectorType>(
Op->getType());
1580 unsigned NumElts = ResultTy->getNumElements() * 8;
1594 for (
unsigned l = 0; l != NumElts; l += 16)
1595 for (
unsigned i = 0; i != 16; ++i) {
1596 unsigned Idx = i + Shift;
1598 Idx += NumElts - 16;
1599 Idxs[l + i] =
Idx + l;
1620 for (
unsigned i = 0; i != NumElts; ++i)
1632 if (
const auto *
C = dyn_cast<Constant>(Mask))
1633 if (
C->isAllOnesValue())
1637 cast<FixedVectorType>(Op0->
getType())->getNumElements());
1644 if (
const auto *
C = dyn_cast<Constant>(Mask))
1645 if (
C->isAllOnesValue())
1649 Mask->getType()->getIntegerBitWidth());
1662 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1664 unsigned NumElts = cast<FixedVectorType>(Op0->
getType())->getNumElements();
1665 assert((IsVALIGN || NumElts % 16 == 0) &&
"Illegal NumElts for PALIGNR!");
1666 assert((!IsVALIGN || NumElts <= 16) &&
"NumElts too large for VALIGN!");
1671 ShiftVal &= (NumElts - 1);
1680 if (ShiftVal > 16) {
1688 for (
unsigned l = 0; l < NumElts; l += 16) {
1689 for (
unsigned i = 0; i != 16; ++i) {
1690 unsigned Idx = ShiftVal + i;
1691 if (!IsVALIGN &&
Idx >= 16)
1692 Idx += NumElts - 16;
1693 Indices[l + i] =
Idx + l;
1698 Op1, Op0,
ArrayRef(Indices, NumElts),
"palignr");
1704 bool ZeroMask,
bool IndexForm) {
1710 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1711 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1712 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1713 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1714 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1715 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1716 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1717 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1718 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1719 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1720 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1721 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1722 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1723 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1724 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1725 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1726 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1727 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1728 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1729 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1730 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1731 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1732 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1733 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1734 else if (VecWidth == 128 && EltWidth == 16)
1735 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1736 else if (VecWidth == 256 && EltWidth == 16)
1737 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1738 else if (VecWidth == 512 && EltWidth == 16)
1739 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1740 else if (VecWidth == 128 && EltWidth == 8)
1741 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1742 else if (VecWidth == 256 && EltWidth == 8)
1743 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1744 else if (VecWidth == 512 && EltWidth == 8)
1745 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1779 bool IsRotateRight) {
1788 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1793 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1813 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1816 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1819 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1822 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1825 Pred = ICmpInst::ICMP_EQ;
1828 Pred = ICmpInst::ICMP_NE;
1844 bool IsShiftRight,
bool ZeroMask) {
1857 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1862 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1881 const Align Alignment =
1883 ?
Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
1887 if (
const auto *
C = dyn_cast<Constant>(Mask))
1888 if (
C->isAllOnesValue())
1892 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
1902 const Align Alignment =
1910 if (
const auto *
C = dyn_cast<Constant>(Mask))
1911 if (
C->isAllOnesValue())
1915 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
1924 {Op0, Builder.
getInt1(
false)});
1939 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1946 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1962 unsigned NumElts = cast<FixedVectorType>(Vec->
getType())->getNumElements();
1964 const auto *
C = dyn_cast<Constant>(Mask);
1965 if (!
C || !
C->isAllOnesValue())
1971 for (
unsigned i = 0; i != NumElts; ++i)
1973 for (
unsigned i = NumElts; i != 8; ++i)
1974 Indices[i] = NumElts + i % NumElts;
1985 unsigned NumElts = cast<FixedVectorType>(Op0->
getType())->getNumElements();
1991 }
else if (
CC == 7) {
1998 case 0: Pred = ICmpInst::ICMP_EQ;
break;
1999 case 1: Pred =
Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
break;
2000 case 2: Pred =
Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
break;
2001 case 4: Pred = ICmpInst::ICMP_NE;
break;
2002 case 5: Pred =
Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
break;
2003 case 6: Pred =
Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
break;
2038 unsigned NumElts = cast<FixedVectorType>(CI.
getType())->getNumElements();
2040 return Builder.
CreateSExt(Mask, ReturnOp,
"vpmovm2");
2051 if (
Name.starts_with(
"max.p")) {
2052 if (VecWidth == 128 && EltWidth == 32)
2053 IID = Intrinsic::x86_sse_max_ps;
2054 else if (VecWidth == 128 && EltWidth == 64)
2055 IID = Intrinsic::x86_sse2_max_pd;
2056 else if (VecWidth == 256 && EltWidth == 32)
2057 IID = Intrinsic::x86_avx_max_ps_256;
2058 else if (VecWidth == 256 && EltWidth == 64)
2059 IID = Intrinsic::x86_avx_max_pd_256;
2062 }
else if (
Name.starts_with(
"min.p")) {
2063 if (VecWidth == 128 && EltWidth == 32)
2064 IID = Intrinsic::x86_sse_min_ps;
2065 else if (VecWidth == 128 && EltWidth == 64)
2066 IID = Intrinsic::x86_sse2_min_pd;
2067 else if (VecWidth == 256 && EltWidth == 32)
2068 IID = Intrinsic::x86_avx_min_ps_256;
2069 else if (VecWidth == 256 && EltWidth == 64)
2070 IID = Intrinsic::x86_avx_min_pd_256;
2073 }
else if (
Name.starts_with(
"pshuf.b.")) {
2074 if (VecWidth == 128)
2075 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2076 else if (VecWidth == 256)
2077 IID = Intrinsic::x86_avx2_pshuf_b;
2078 else if (VecWidth == 512)
2079 IID = Intrinsic::x86_avx512_pshuf_b_512;
2082 }
else if (
Name.starts_with(
"pmul.hr.sw.")) {
2083 if (VecWidth == 128)
2084 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2085 else if (VecWidth == 256)
2086 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2087 else if (VecWidth == 512)
2088 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2091 }
else if (
Name.starts_with(
"pmulh.w.")) {
2092 if (VecWidth == 128)
2093 IID = Intrinsic::x86_sse2_pmulh_w;
2094 else if (VecWidth == 256)
2095 IID = Intrinsic::x86_avx2_pmulh_w;
2096 else if (VecWidth == 512)
2097 IID = Intrinsic::x86_avx512_pmulh_w_512;
2100 }
else if (
Name.starts_with(
"pmulhu.w.")) {
2101 if (VecWidth == 128)
2102 IID = Intrinsic::x86_sse2_pmulhu_w;
2103 else if (VecWidth == 256)
2104 IID = Intrinsic::x86_avx2_pmulhu_w;
2105 else if (VecWidth == 512)
2106 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2109 }
else if (
Name.starts_with(
"pmaddw.d.")) {
2110 if (VecWidth == 128)
2111 IID = Intrinsic::x86_sse2_pmadd_wd;
2112 else if (VecWidth == 256)
2113 IID = Intrinsic::x86_avx2_pmadd_wd;
2114 else if (VecWidth == 512)
2115 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2118 }
else if (
Name.starts_with(
"pmaddubs.w.")) {
2119 if (VecWidth == 128)
2120 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2121 else if (VecWidth == 256)
2122 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2123 else if (VecWidth == 512)
2124 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2127 }
else if (
Name.starts_with(
"packsswb.")) {
2128 if (VecWidth == 128)
2129 IID = Intrinsic::x86_sse2_packsswb_128;
2130 else if (VecWidth == 256)
2131 IID = Intrinsic::x86_avx2_packsswb;
2132 else if (VecWidth == 512)
2133 IID = Intrinsic::x86_avx512_packsswb_512;
2136 }
else if (
Name.starts_with(
"packssdw.")) {
2137 if (VecWidth == 128)
2138 IID = Intrinsic::x86_sse2_packssdw_128;
2139 else if (VecWidth == 256)
2140 IID = Intrinsic::x86_avx2_packssdw;
2141 else if (VecWidth == 512)
2142 IID = Intrinsic::x86_avx512_packssdw_512;
2145 }
else if (
Name.starts_with(
"packuswb.")) {
2146 if (VecWidth == 128)
2147 IID = Intrinsic::x86_sse2_packuswb_128;
2148 else if (VecWidth == 256)
2149 IID = Intrinsic::x86_avx2_packuswb;
2150 else if (VecWidth == 512)
2151 IID = Intrinsic::x86_avx512_packuswb_512;
2154 }
else if (
Name.starts_with(
"packusdw.")) {
2155 if (VecWidth == 128)
2156 IID = Intrinsic::x86_sse41_packusdw;
2157 else if (VecWidth == 256)
2158 IID = Intrinsic::x86_avx2_packusdw;
2159 else if (VecWidth == 512)
2160 IID = Intrinsic::x86_avx512_packusdw_512;
2163 }
else if (
Name.starts_with(
"vpermilvar.")) {
2164 if (VecWidth == 128 && EltWidth == 32)
2165 IID = Intrinsic::x86_avx_vpermilvar_ps;
2166 else if (VecWidth == 128 && EltWidth == 64)
2167 IID = Intrinsic::x86_avx_vpermilvar_pd;
2168 else if (VecWidth == 256 && EltWidth == 32)
2169 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2170 else if (VecWidth == 256 && EltWidth == 64)
2171 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2172 else if (VecWidth == 512 && EltWidth == 32)
2173 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2174 else if (VecWidth == 512 && EltWidth == 64)
2175 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2178 }
else if (
Name ==
"cvtpd2dq.256") {
2179 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2180 }
else if (
Name ==
"cvtpd2ps.256") {
2181 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2182 }
else if (
Name ==
"cvttpd2dq.256") {
2183 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2184 }
else if (
Name ==
"cvttps2dq.128") {
2185 IID = Intrinsic::x86_sse2_cvttps2dq;
2186 }
else if (
Name ==
"cvttps2dq.256") {
2187 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2188 }
else if (
Name.starts_with(
"permvar.")) {
2190 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2191 IID = Intrinsic::x86_avx2_permps;
2192 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2193 IID = Intrinsic::x86_avx2_permd;
2194 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2195 IID = Intrinsic::x86_avx512_permvar_df_256;
2196 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2197 IID = Intrinsic::x86_avx512_permvar_di_256;
2198 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2199 IID = Intrinsic::x86_avx512_permvar_sf_512;
2200 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2201 IID = Intrinsic::x86_avx512_permvar_si_512;
2202 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2203 IID = Intrinsic::x86_avx512_permvar_df_512;
2204 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2205 IID = Intrinsic::x86_avx512_permvar_di_512;
2206 else if (VecWidth == 128 && EltWidth == 16)
2207 IID = Intrinsic::x86_avx512_permvar_hi_128;
2208 else if (VecWidth == 256 && EltWidth == 16)
2209 IID = Intrinsic::x86_avx512_permvar_hi_256;
2210 else if (VecWidth == 512 && EltWidth == 16)
2211 IID = Intrinsic::x86_avx512_permvar_hi_512;
2212 else if (VecWidth == 128 && EltWidth == 8)
2213 IID = Intrinsic::x86_avx512_permvar_qi_128;
2214 else if (VecWidth == 256 && EltWidth == 8)
2215 IID = Intrinsic::x86_avx512_permvar_qi_256;
2216 else if (VecWidth == 512 && EltWidth == 8)
2217 IID = Intrinsic::x86_avx512_permvar_qi_512;
2220 }
else if (
Name.starts_with(
"dbpsadbw.")) {
2221 if (VecWidth == 128)
2222 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2223 else if (VecWidth == 256)
2224 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2225 else if (VecWidth == 512)
2226 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2229 }
else if (
Name.starts_with(
"pmultishift.qb.")) {
2230 if (VecWidth == 128)
2231 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2232 else if (VecWidth == 256)
2233 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2234 else if (VecWidth == 512)
2235 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2238 }
else if (
Name.starts_with(
"conflict.")) {
2239 if (
Name[9] ==
'd' && VecWidth == 128)
2240 IID = Intrinsic::x86_avx512_conflict_d_128;
2241 else if (
Name[9] ==
'd' && VecWidth == 256)
2242 IID = Intrinsic::x86_avx512_conflict_d_256;
2243 else if (
Name[9] ==
'd' && VecWidth == 512)
2244 IID = Intrinsic::x86_avx512_conflict_d_512;
2245 else if (
Name[9] ==
'q' && VecWidth == 128)
2246 IID = Intrinsic::x86_avx512_conflict_q_128;
2247 else if (
Name[9] ==
'q' && VecWidth == 256)
2248 IID = Intrinsic::x86_avx512_conflict_q_256;
2249 else if (
Name[9] ==
'q' && VecWidth == 512)
2250 IID = Intrinsic::x86_avx512_conflict_q_512;
2253 }
else if (
Name.starts_with(
"pavg.")) {
2254 if (
Name[5] ==
'b' && VecWidth == 128)
2255 IID = Intrinsic::x86_sse2_pavg_b;
2256 else if (
Name[5] ==
'b' && VecWidth == 256)
2257 IID = Intrinsic::x86_avx2_pavg_b;
2258 else if (
Name[5] ==
'b' && VecWidth == 512)
2259 IID = Intrinsic::x86_avx512_pavg_b_512;
2260 else if (
Name[5] ==
'w' && VecWidth == 128)
2261 IID = Intrinsic::x86_sse2_pavg_w;
2262 else if (
Name[5] ==
'w' && VecWidth == 256)
2263 IID = Intrinsic::x86_avx2_pavg_w;
2264 else if (
Name[5] ==
'w' && VecWidth == 512)
2265 IID = Intrinsic::x86_avx512_pavg_w_512;
2285 if (AsmStr->find(
"mov\tfp") == 0 &&
2286 AsmStr->find(
"objc_retainAutoreleaseReturnValue") != std::string::npos &&
2287 (Pos = AsmStr->find(
"# marker")) != std::string::npos) {
2288 AsmStr->replace(Pos, 1,
";");
2294 Value *Rep =
nullptr;
2296 if (
Name ==
"abs.i" ||
Name ==
"abs.ll") {
2302 }
else if (
Name.starts_with(
"atomic.load.add.f32.p") ||
2303 Name.starts_with(
"atomic.load.add.f64.p")) {
2307 AtomicOrdering::SequentiallyConsistent);
2308 }
else if (
Name.consume_front(
"max.") &&
2317 }
else if (
Name.consume_front(
"min.") &&
2326 }
else if (
Name ==
"clz.ll") {
2333 }
else if (
Name ==
"popc.ll") {
2338 Arg,
nullptr,
"ctpop");
2340 }
else if (
Name ==
"h2f") {
2344 }
else if (
Name.consume_front(
"bitcast.") &&
2348 }
else if (
Name ==
"rotate.b32") {
2352 {Arg, Arg, ShiftAmt});
2353 }
else if (
Name ==
"rotate.b64") {
2358 {Arg, Arg, ZExtShiftAmt});
2359 }
else if (
Name ==
"rotate.right.b64") {
2364 {Arg, Arg, ZExtShiftAmt});
2365 }
else if ((
Name.consume_front(
"ptr.gen.to.") &&
2366 (
Name.starts_with(
"local") ||
Name.starts_with(
"shared") ||
2367 Name.starts_with(
"global") ||
Name.starts_with(
"constant"))) ||
2368 (
Name.consume_front(
"ptr.") &&
2369 (
Name.consume_front(
"local") ||
Name.consume_front(
"shared") ||
2370 Name.consume_front(
"global") ||
2371 Name.consume_front(
"constant")) &&
2372 Name.starts_with(
".to.gen"))) {
2374 }
else if (
Name.consume_front(
"ldg.global")) {
2381 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2386 !
F->getReturnType()->getScalarType()->isBFloatTy()) {
2400 if (
F->getReturnType()->isIntegerTy())
2411 Value *Rep =
nullptr;
2413 if (
Name.starts_with(
"sse4a.movnt.")) {
2424 Type *SrcEltTy = cast<VectorType>(Arg1->
getType())->getElementType();
2425 PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
2431 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2432 }
else if (
Name.starts_with(
"avx.movnt.") ||
2433 Name.starts_with(
"avx512.storent.")) {
2444 Arg0, PointerType::getUnqual(Arg1->
getType()),
"cast");
2448 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2449 }
else if (
Name ==
"sse2.storel.dq") {
2457 Arg0, PointerType::getUnqual(Elt->
getType()),
"cast");
2459 }
else if (
Name.starts_with(
"sse.storeu.") ||
2460 Name.starts_with(
"sse2.storeu.") ||
2461 Name.starts_with(
"avx.storeu.")) {
2468 }
else if (
Name ==
"avx512.mask.store.ss") {
2472 }
else if (
Name.starts_with(
"avx512.mask.store")) {
2477 }
else if (
Name.starts_with(
"sse2.pcmp") ||
Name.starts_with(
"avx2.pcmp")) {
2480 bool CmpEq =
Name[9] ==
'e';
2481 Rep = Builder.
CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2484 }
else if (
Name.starts_with(
"avx512.broadcastm")) {
2492 }
else if (
Name ==
"sse.sqrt.ss" ||
Name ==
"sse2.sqrt.sd") {
2497 }
else if (
Name.starts_with(
"avx.sqrt.p") ||
2498 Name.starts_with(
"sse2.sqrt.p") ||
2499 Name.starts_with(
"sse.sqrt.p")) {
2501 {CI->getArgOperand(0)});
2502 }
else if (
Name.starts_with(
"avx512.mask.sqrt.p")) {
2505 cast<ConstantInt>(CI->
getArgOperand(3))->getZExtValue() != 4)) {
2507 : Intrinsic::x86_avx512_sqrt_pd_512;
2513 {CI->getArgOperand(0)});
2517 }
else if (
Name.starts_with(
"avx512.ptestm") ||
2518 Name.starts_with(
"avx512.ptestnm")) {
2527 : ICmpInst::ICMP_EQ;
2530 }
else if (
Name.starts_with(
"avx512.mask.pbroadcast")) {
2536 }
else if (
Name.starts_with(
"avx512.kunpck")) {
2541 for (
unsigned i = 0; i != NumElts; ++i)
2552 }
else if (
Name ==
"avx512.kand.w") {
2557 }
else if (
Name ==
"avx512.kandn.w") {
2563 }
else if (
Name ==
"avx512.kor.w") {
2568 }
else if (
Name ==
"avx512.kxor.w") {
2573 }
else if (
Name ==
"avx512.kxnor.w") {
2579 }
else if (
Name ==
"avx512.knot.w") {
2583 }
else if (
Name ==
"avx512.kortestz.w" ||
Name ==
"avx512.kortestc.w") {
2589 if (
Name[14] ==
'c')
2590 C = ConstantInt::getAllOnesValue(Builder.
getInt16Ty());
2592 C = ConstantInt::getNullValue(Builder.
getInt16Ty());
2595 }
else if (
Name ==
"sse.add.ss" ||
Name ==
"sse2.add.sd" ||
2596 Name ==
"sse.sub.ss" ||
Name ==
"sse2.sub.sd" ||
2597 Name ==
"sse.mul.ss" ||
Name ==
"sse2.mul.sd" ||
2598 Name ==
"sse.div.ss" ||
Name ==
"sse2.div.sd") {
2601 ConstantInt::get(I32Ty, 0));
2603 ConstantInt::get(I32Ty, 0));
2605 if (
Name.contains(
".add."))
2607 else if (
Name.contains(
".sub."))
2609 else if (
Name.contains(
".mul."))
2614 ConstantInt::get(I32Ty, 0));
2615 }
else if (
Name.starts_with(
"avx512.mask.pcmp")) {
2617 bool CmpEq =
Name[16] ==
'e';
2619 }
else if (
Name.starts_with(
"avx512.mask.vpshufbitqmb.")) {
2627 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
2630 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
2633 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
2640 }
else if (
Name.starts_with(
"avx512.mask.fpclass.p")) {
2645 if (VecWidth == 128 && EltWidth == 32)
2646 IID = Intrinsic::x86_avx512_fpclass_ps_128;
2647 else if (VecWidth == 256 && EltWidth == 32)
2648 IID = Intrinsic::x86_avx512_fpclass_ps_256;
2649 else if (VecWidth == 512 && EltWidth == 32)
2650 IID = Intrinsic::x86_avx512_fpclass_ps_512;
2651 else if (VecWidth == 128 && EltWidth == 64)
2652 IID = Intrinsic::x86_avx512_fpclass_pd_128;
2653 else if (VecWidth == 256 && EltWidth == 64)
2654 IID = Intrinsic::x86_avx512_fpclass_pd_256;
2655 else if (VecWidth == 512 && EltWidth == 64)
2656 IID = Intrinsic::x86_avx512_fpclass_pd_512;
2663 }
else if (
Name.starts_with(
"avx512.cmp.p")) {
2665 Type *OpTy = Args[0]->getType();
2669 if (VecWidth == 128 && EltWidth == 32)
2670 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2671 else if (VecWidth == 256 && EltWidth == 32)
2672 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2673 else if (VecWidth == 512 && EltWidth == 32)
2674 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2675 else if (VecWidth == 128 && EltWidth == 64)
2676 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2677 else if (VecWidth == 256 && EltWidth == 64)
2678 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2679 else if (VecWidth == 512 && EltWidth == 64)
2680 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2685 if (VecWidth == 512)
2687 Args.push_back(Mask);
2690 }
else if (
Name.starts_with(
"avx512.mask.cmp.")) {
2692 unsigned Imm = cast<ConstantInt>(CI->
getArgOperand(2))->getZExtValue();
2694 }
else if (
Name.starts_with(
"avx512.mask.ucmp.")) {
2695 unsigned Imm = cast<ConstantInt>(CI->
getArgOperand(2))->getZExtValue();
2697 }
else if (
Name.starts_with(
"avx512.cvtb2mask.") ||
2698 Name.starts_with(
"avx512.cvtw2mask.") ||
2699 Name.starts_with(
"avx512.cvtd2mask.") ||
2700 Name.starts_with(
"avx512.cvtq2mask.")) {
2703 Rep = Builder.
CreateICmp(ICmpInst::ICMP_SLT,
Op, Zero);
2705 }
else if (
Name ==
"ssse3.pabs.b.128" ||
Name ==
"ssse3.pabs.w.128" ||
2706 Name ==
"ssse3.pabs.d.128" ||
Name.starts_with(
"avx2.pabs") ||
2707 Name.starts_with(
"avx512.mask.pabs")) {
2709 }
else if (
Name ==
"sse41.pmaxsb" ||
Name ==
"sse2.pmaxs.w" ||
2710 Name ==
"sse41.pmaxsd" ||
Name.starts_with(
"avx2.pmaxs") ||
2711 Name.starts_with(
"avx512.mask.pmaxs")) {
2713 }
else if (
Name ==
"sse2.pmaxu.b" ||
Name ==
"sse41.pmaxuw" ||
2714 Name ==
"sse41.pmaxud" ||
Name.starts_with(
"avx2.pmaxu") ||
2715 Name.starts_with(
"avx512.mask.pmaxu")) {
2717 }
else if (
Name ==
"sse41.pminsb" ||
Name ==
"sse2.pmins.w" ||
2718 Name ==
"sse41.pminsd" ||
Name.starts_with(
"avx2.pmins") ||
2719 Name.starts_with(
"avx512.mask.pmins")) {
2721 }
else if (
Name ==
"sse2.pminu.b" ||
Name ==
"sse41.pminuw" ||
2722 Name ==
"sse41.pminud" ||
Name.starts_with(
"avx2.pminu") ||
2723 Name.starts_with(
"avx512.mask.pminu")) {
2725 }
else if (
Name ==
"sse2.pmulu.dq" ||
Name ==
"avx2.pmulu.dq" ||
2726 Name ==
"avx512.pmulu.dq.512" ||
2727 Name.starts_with(
"avx512.mask.pmulu.dq.")) {
2729 }
else if (
Name ==
"sse41.pmuldq" ||
Name ==
"avx2.pmul.dq" ||
2730 Name ==
"avx512.pmul.dq.512" ||
2731 Name.starts_with(
"avx512.mask.pmul.dq.")) {
2733 }
else if (
Name ==
"sse.cvtsi2ss" ||
Name ==
"sse2.cvtsi2sd" ||
2734 Name ==
"sse.cvtsi642ss" ||
Name ==
"sse2.cvtsi642sd") {
2737 cast<VectorType>(CI->
getType())->getElementType());
2739 }
else if (
Name ==
"avx512.cvtusi2sd") {
2742 cast<VectorType>(CI->
getType())->getElementType());
2744 }
else if (
Name ==
"sse2.cvtss2sd") {
2747 Rep, cast<VectorType>(CI->
getType())->getElementType());
2749 }
else if (
Name ==
"sse2.cvtdq2pd" ||
Name ==
"sse2.cvtdq2ps" ||
2750 Name ==
"avx.cvtdq2.pd.256" ||
Name ==
"avx.cvtdq2.ps.256" ||
2751 Name.starts_with(
"avx512.mask.cvtdq2pd.") ||
2752 Name.starts_with(
"avx512.mask.cvtudq2pd.") ||
2753 Name.starts_with(
"avx512.mask.cvtdq2ps.") ||
2754 Name.starts_with(
"avx512.mask.cvtudq2ps.") ||
2755 Name.starts_with(
"avx512.mask.cvtqq2pd.") ||
2756 Name.starts_with(
"avx512.mask.cvtuqq2pd.") ||
2757 Name ==
"avx512.mask.cvtqq2ps.256" ||
2758 Name ==
"avx512.mask.cvtqq2ps.512" ||
2759 Name ==
"avx512.mask.cvtuqq2ps.256" ||
2760 Name ==
"avx512.mask.cvtuqq2ps.512" ||
Name ==
"sse2.cvtps2pd" ||
2761 Name ==
"avx.cvt.ps2.pd.256" ||
2762 Name ==
"avx512.mask.cvtps2pd.128" ||
2763 Name ==
"avx512.mask.cvtps2pd.256") {
2764 auto *DstTy = cast<FixedVectorType>(CI->
getType());
2766 auto *SrcTy = cast<FixedVectorType>(Rep->
getType());
2768 unsigned NumDstElts = DstTy->getNumElements();
2770 assert(NumDstElts == 2 &&
"Unexpected vector size");
2774 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2775 bool IsUnsigned =
Name.contains(
"cvtu");
2777 Rep = Builder.
CreateFPExt(Rep, DstTy,
"cvtps2pd");
2780 cast<ConstantInt>(CI->
getArgOperand(3))->getZExtValue() != 4)) {
2781 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2782 : Intrinsic::x86_avx512_sitofp_round;
2786 Rep = IsUnsigned ? Builder.
CreateUIToFP(Rep, DstTy,
"cvt")
2793 }
else if (
Name.starts_with(
"avx512.mask.vcvtph2ps.") ||
2794 Name.starts_with(
"vcvtph2ps.")) {
2795 auto *DstTy = cast<FixedVectorType>(CI->
getType());
2797 auto *SrcTy = cast<FixedVectorType>(Rep->
getType());
2798 unsigned NumDstElts = DstTy->getNumElements();
2799 if (NumDstElts != SrcTy->getNumElements()) {
2800 assert(NumDstElts == 4 &&
"Unexpected vector size");
2805 Rep = Builder.
CreateFPExt(Rep, DstTy,
"cvtph2ps");
2809 }
else if (
Name.starts_with(
"avx512.mask.load")) {
2814 }
else if (
Name.starts_with(
"avx512.mask.expand.load.")) {
2815 auto *ResultTy = cast<FixedVectorType>(CI->
getType());
2816 Type *PtrTy = ResultTy->getElementType();
2823 ResultTy->getNumElements());
2827 }
else if (
Name.starts_with(
"avx512.mask.compress.store.")) {
2829 Type *PtrTy = ResultTy->getElementType();
2837 cast<FixedVectorType>(ResultTy)->getNumElements());
2839 Rep = Builder.
CreateIntrinsic(Intrinsic::masked_compressstore, ResultTy,
2841 }
else if (
Name.starts_with(
"avx512.mask.compress.") ||
2842 Name.starts_with(
"avx512.mask.expand.")) {
2843 auto *ResultTy = cast<FixedVectorType>(CI->
getType());
2846 ResultTy->getNumElements());
2848 bool IsCompress =
Name[12] ==
'c';
2849 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2850 : Intrinsic::x86_avx512_mask_expand;
2853 }
else if (
Name.starts_with(
"xop.vpcom")) {
2855 if (
Name.ends_with(
"ub") ||
Name.ends_with(
"uw") ||
Name.ends_with(
"ud") ||
2856 Name.ends_with(
"uq"))
2858 else if (
Name.ends_with(
"b") ||
Name.ends_with(
"w") ||
2859 Name.ends_with(
"d") ||
Name.ends_with(
"q"))
2866 Imm = cast<ConstantInt>(CI->
getArgOperand(2))->getZExtValue();
2869 if (
Name.starts_with(
"lt"))
2871 else if (
Name.starts_with(
"le"))
2873 else if (
Name.starts_with(
"gt"))
2875 else if (
Name.starts_with(
"ge"))
2877 else if (
Name.starts_with(
"eq"))
2879 else if (
Name.starts_with(
"ne"))
2881 else if (
Name.starts_with(
"false"))
2883 else if (
Name.starts_with(
"true"))
2890 }
else if (
Name.starts_with(
"xop.vpcmov")) {
2895 Rep = Builder.
CreateOr(Sel0, Sel1);
2896 }
else if (
Name.starts_with(
"xop.vprot") ||
Name.starts_with(
"avx512.prol") ||
2897 Name.starts_with(
"avx512.mask.prol")) {
2899 }
else if (
Name.starts_with(
"avx512.pror") ||
2900 Name.starts_with(
"avx512.mask.pror")) {
2902 }
else if (
Name.starts_with(
"avx512.vpshld.") ||
2903 Name.starts_with(
"avx512.mask.vpshld") ||
2904 Name.starts_with(
"avx512.maskz.vpshld")) {
2905 bool ZeroMask =
Name[11] ==
'z';
2907 }
else if (
Name.starts_with(
"avx512.vpshrd.") ||
2908 Name.starts_with(
"avx512.mask.vpshrd") ||
2909 Name.starts_with(
"avx512.maskz.vpshrd")) {
2910 bool ZeroMask =
Name[11] ==
'z';
2912 }
else if (
Name ==
"sse42.crc32.64.8") {
2918 }
else if (
Name.starts_with(
"avx.vbroadcast.s") ||
2919 Name.starts_with(
"avx512.vbroadcast.s")) {
2921 auto *VecTy = cast<FixedVectorType>(CI->
getType());
2922 Type *EltTy = VecTy->getElementType();
2923 unsigned EltNum = VecTy->getNumElements();
2927 for (
unsigned I = 0;
I < EltNum; ++
I)
2929 }
else if (
Name.starts_with(
"sse41.pmovsx") ||
2930 Name.starts_with(
"sse41.pmovzx") ||
2931 Name.starts_with(
"avx2.pmovsx") ||
2932 Name.starts_with(
"avx2.pmovzx") ||
2933 Name.starts_with(
"avx512.mask.pmovsx") ||
2934 Name.starts_with(
"avx512.mask.pmovzx")) {
2935 auto *DstTy = cast<FixedVectorType>(CI->
getType());
2936 unsigned NumDstElts = DstTy->getNumElements();
2940 for (
unsigned i = 0; i != NumDstElts; ++i)
2945 bool DoSext =
Name.contains(
"pmovsx");
2952 }
else if (
Name ==
"avx512.mask.pmov.qd.256" ||
2953 Name ==
"avx512.mask.pmov.qd.512" ||
2954 Name ==
"avx512.mask.pmov.wb.256" ||
2955 Name ==
"avx512.mask.pmov.wb.512") {
2960 }
else if (
Name.starts_with(
"avx.vbroadcastf128") ||
2961 Name ==
"avx2.vbroadcasti128") {
2963 Type *EltTy = cast<VectorType>(CI->
getType())->getElementType();
2967 PointerType::getUnqual(VT));
2969 if (NumSrcElts == 2)
2974 }
else if (
Name.starts_with(
"avx512.mask.shuf.i") ||
2975 Name.starts_with(
"avx512.mask.shuf.f")) {
2976 unsigned Imm = cast<ConstantInt>(CI->
getArgOperand(2))->getZExtValue();
2980 unsigned ControlBitsMask = NumLanes - 1;
2981 unsigned NumControlBits = NumLanes / 2;
2984 for (
unsigned l = 0; l != NumLanes; ++l) {
2985 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2987 if (l >= NumLanes / 2)
2988 LaneMask += NumLanes;
2989 for (
unsigned i = 0; i != NumElementsInLane; ++i)
2990 ShuffleMask.
push_back(LaneMask * NumElementsInLane + i);
2996 }
else if (
Name.starts_with(
"avx512.mask.broadcastf") ||
2997 Name.starts_with(
"avx512.mask.broadcasti")) {
3000 unsigned NumDstElts =
3001 cast<FixedVectorType>(CI->
getType())->getNumElements();
3004 for (
unsigned i = 0; i != NumDstElts; ++i)
3005 ShuffleMask[i] = i % NumSrcElts;
3011 }
else if (
Name.starts_with(
"avx2.pbroadcast") ||
3012 Name.starts_with(
"avx2.vbroadcast") ||
3013 Name.starts_with(
"avx512.pbroadcast") ||
3014 Name.starts_with(
"avx512.mask.broadcast.s")) {
3026 }
else if (
Name.starts_with(
"sse2.padds.") ||
3027 Name.starts_with(
"avx2.padds.") ||
3028 Name.starts_with(
"avx512.padds.") ||
3029 Name.starts_with(
"avx512.mask.padds.")) {
3031 }
else if (
Name.starts_with(
"sse2.psubs.") ||
3032 Name.starts_with(
"avx2.psubs.") ||
3033 Name.starts_with(
"avx512.psubs.") ||
3034 Name.starts_with(
"avx512.mask.psubs.")) {
3036 }
else if (
Name.starts_with(
"sse2.paddus.") ||
3037 Name.starts_with(
"avx2.paddus.") ||
3038 Name.starts_with(
"avx512.mask.paddus.")) {
3040 }
else if (
Name.starts_with(
"sse2.psubus.") ||
3041 Name.starts_with(
"avx2.psubus.") ||
3042 Name.starts_with(
"avx512.mask.psubus.")) {
3044 }
else if (
Name.starts_with(
"avx512.mask.palignr.")) {
3049 }
else if (
Name.starts_with(
"avx512.mask.valign.")) {
3053 }
else if (
Name ==
"sse2.psll.dq" ||
Name ==
"avx2.psll.dq") {
3055 unsigned Shift = cast<ConstantInt>(CI->
getArgOperand(1))->getZExtValue();
3058 }
else if (
Name ==
"sse2.psrl.dq" ||
Name ==
"avx2.psrl.dq") {
3060 unsigned Shift = cast<ConstantInt>(CI->
getArgOperand(1))->getZExtValue();
3063 }
else if (
Name ==
"sse2.psll.dq.bs" ||
Name ==
"avx2.psll.dq.bs" ||
3064 Name ==
"avx512.psll.dq.512") {
3066 unsigned Shift = cast<ConstantInt>(CI->
getArgOperand(1))->getZExtValue();
3068 }
else if (
Name ==
"sse2.psrl.dq.bs" ||
Name ==
"avx2.psrl.dq.bs" ||
3069 Name ==
"avx512.psrl.dq.512") {
3071 unsigned Shift = cast<ConstantInt>(CI->
getArgOperand(1))->getZExtValue();
3073 }
else if (
Name ==
"sse41.pblendw" ||
Name.starts_with(
"sse41.blendp") ||
3074 Name.starts_with(
"avx.blend.p") ||
Name ==
"avx2.pblendw" ||
3075 Name.starts_with(
"avx2.pblendd.")) {
3078 unsigned Imm = cast<ConstantInt>(CI->
getArgOperand(2))->getZExtValue();
3079 auto *VecTy = cast<FixedVectorType>(CI->
getType());
3080 unsigned NumElts = VecTy->getNumElements();
3083 for (
unsigned i = 0; i != NumElts; ++i)
3084 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3087 }
else if (
Name.starts_with(
"avx.vinsertf128.") ||
3088 Name ==
"avx2.vinserti128" ||
3089 Name.starts_with(
"avx512.mask.insert")) {
3092 unsigned Imm = cast<ConstantInt>(CI->
getArgOperand(2))->getZExtValue();
3093 unsigned DstNumElts =
3094 cast<FixedVectorType>(CI->
getType())->getNumElements();
3095 unsigned SrcNumElts =
3096 cast<FixedVectorType>(Op1->
getType())->getNumElements();
3097 unsigned Scale = DstNumElts / SrcNumElts;
3104 for (
unsigned i = 0; i != SrcNumElts; ++i)
3106 for (
unsigned i = SrcNumElts; i != DstNumElts; ++i)
3107 Idxs[i] = SrcNumElts;
3122 for (
unsigned i = 0; i != DstNumElts; ++i)
3125 for (
unsigned i = 0; i != SrcNumElts; ++i)
3126 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3133 }
else if (
Name.starts_with(
"avx.vextractf128.") ||
3134 Name ==
"avx2.vextracti128" ||
3135 Name.starts_with(
"avx512.mask.vextract")) {
3137 unsigned Imm = cast<ConstantInt>(CI->
getArgOperand(1))->getZExtValue();
3138 unsigned DstNumElts =
3139 cast<FixedVectorType>(CI->
getType())->getNumElements();
3140 unsigned SrcNumElts =
3141 cast<FixedVectorType>(Op0->
getType())->getNumElements();
3142 unsigned Scale = SrcNumElts / DstNumElts;
3149 for (
unsigned i = 0; i != DstNumElts; ++i) {
3150 Idxs[i] = i + (Imm * DstNumElts);
3158 }
else if (
Name.starts_with(
"avx512.mask.perm.df.") ||
3159 Name.starts_with(
"avx512.mask.perm.di.")) {
3161 unsigned Imm = cast<ConstantInt>(CI->
getArgOperand(1))->getZExtValue();
3162 auto *VecTy = cast<FixedVectorType>(CI->
getType());
3163 unsigned NumElts = VecTy->getNumElements();
3166 for (
unsigned i = 0; i != NumElts; ++i)
3167 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3174 }
else if (
Name.starts_with(
"avx.vperm2f128.") ||
Name ==
"avx2.vperm2i128") {
3185 unsigned NumElts = cast<FixedVectorType>(CI->
getType())->getNumElements();
3186 unsigned HalfSize = NumElts / 2;
3198 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3199 for (
unsigned i = 0; i < HalfSize; ++i)
3200 ShuffleMask[i] = StartIndex + i;
3203 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3204 for (
unsigned i = 0; i < HalfSize; ++i)
3205 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3209 }
else if (
Name.starts_with(
"avx.vpermil.") ||
Name ==
"sse2.pshuf.d" ||
3210 Name.starts_with(
"avx512.mask.vpermil.p") ||
3211 Name.starts_with(
"avx512.mask.pshuf.d.")) {
3213 unsigned Imm = cast<ConstantInt>(CI->
getArgOperand(1))->getZExtValue();
3214 auto *VecTy = cast<FixedVectorType>(CI->
getType());
3215 unsigned NumElts = VecTy->getNumElements();
3217 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3218 unsigned IdxMask = ((1 << IdxSize) - 1);
3224 for (
unsigned i = 0; i != NumElts; ++i)
3225 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3232 }
else if (
Name ==
"sse2.pshufl.w" ||
3233 Name.starts_with(
"avx512.mask.pshufl.w.")) {
3235 unsigned Imm = cast<ConstantInt>(CI->
getArgOperand(1))->getZExtValue();
3236 unsigned NumElts = cast<FixedVectorType>(CI->
getType())->getNumElements();
3239 for (
unsigned l = 0; l != NumElts; l += 8) {
3240 for (
unsigned i = 0; i != 4; ++i)
3241 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3242 for (
unsigned i = 4; i != 8; ++i)
3243 Idxs[i + l] = i + l;
3251 }
else if (
Name ==
"sse2.pshufh.w" ||
3252 Name.starts_with(
"avx512.mask.pshufh.w.")) {
3254 unsigned Imm = cast<ConstantInt>(CI->
getArgOperand(1))->getZExtValue();
3255 unsigned NumElts = cast<FixedVectorType>(CI->
getType())->getNumElements();
3258 for (
unsigned l = 0; l != NumElts; l += 8) {
3259 for (
unsigned i = 0; i != 4; ++i)
3260 Idxs[i + l] = i + l;
3261 for (
unsigned i = 0; i != 4; ++i)
3262 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3270 }
else if (
Name.starts_with(
"avx512.mask.shuf.p")) {
3273 unsigned Imm = cast<ConstantInt>(CI->
getArgOperand(2))->getZExtValue();
3274 unsigned NumElts = cast<FixedVectorType>(CI->
getType())->getNumElements();
3277 unsigned HalfLaneElts = NumLaneElts / 2;
3280 for (
unsigned i = 0; i != NumElts; ++i) {
3282 Idxs[i] = i - (i % NumLaneElts);
3284 if ((i % NumLaneElts) >= HalfLaneElts)
3288 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3295 }
else if (
Name.starts_with(
"avx512.mask.movddup") ||
3296 Name.starts_with(
"avx512.mask.movshdup") ||
3297 Name.starts_with(
"avx512.mask.movsldup")) {
3299 unsigned NumElts = cast<FixedVectorType>(CI->
getType())->getNumElements();
3303 if (
Name.starts_with(
"avx512.mask.movshdup."))
3307 for (
unsigned l = 0; l != NumElts; l += NumLaneElts)
3308 for (
unsigned i = 0; i != NumLaneElts; i += 2) {
3309 Idxs[i + l + 0] = i + l +
Offset;
3310 Idxs[i + l + 1] = i + l +
Offset;
3317 }
else if (
Name.starts_with(
"avx512.mask.punpckl") ||
3318 Name.starts_with(
"avx512.mask.unpckl.")) {
3321 int NumElts = cast<FixedVectorType>(CI->
getType())->getNumElements();
3325 for (
int l = 0; l != NumElts; l += NumLaneElts)
3326 for (
int i = 0; i != NumLaneElts; ++i)
3327 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3333 }
else if (
Name.starts_with(
"avx512.mask.punpckh") ||
3334 Name.starts_with(
"avx512.mask.unpckh.")) {
3337 int NumElts = cast<FixedVectorType>(CI->
getType())->getNumElements();
3341 for (
int l = 0; l != NumElts; l += NumLaneElts)
3342 for (
int i = 0; i != NumLaneElts; ++i)
3343 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3349 }
else if (
Name.starts_with(
"avx512.mask.and.") ||
3350 Name.starts_with(
"avx512.mask.pand.")) {
3352 VectorType *ITy = VectorType::getInteger(FTy);
3358 }
else if (
Name.starts_with(
"avx512.mask.andn.") ||
3359 Name.starts_with(
"avx512.mask.pandn.")) {
3361 VectorType *ITy = VectorType::getInteger(FTy);
3368 }
else if (
Name.starts_with(
"avx512.mask.or.") ||
3369 Name.starts_with(
"avx512.mask.por.")) {
3371 VectorType *ITy = VectorType::getInteger(FTy);
3377 }
else if (
Name.starts_with(
"avx512.mask.xor.") ||
3378 Name.starts_with(
"avx512.mask.pxor.")) {
3380 VectorType *ITy = VectorType::getInteger(FTy);
3386 }
else if (
Name.starts_with(
"avx512.mask.padd.")) {
3390 }
else if (
Name.starts_with(
"avx512.mask.psub.")) {
3394 }
else if (
Name.starts_with(
"avx512.mask.pmull.")) {
3398 }
else if (
Name.starts_with(
"avx512.mask.add.p")) {
3399 if (
Name.ends_with(
".512")) {
3401 if (
Name[17] ==
's')
3402 IID = Intrinsic::x86_avx512_add_ps_512;
3404 IID = Intrinsic::x86_avx512_add_pd_512;
3414 }
else if (
Name.starts_with(
"avx512.mask.div.p")) {
3415 if (
Name.ends_with(
".512")) {
3417 if (
Name[17] ==
's')
3418 IID = Intrinsic::x86_avx512_div_ps_512;
3420 IID = Intrinsic::x86_avx512_div_pd_512;
3430 }
else if (
Name.starts_with(
"avx512.mask.mul.p")) {
3431 if (
Name.ends_with(
".512")) {
3433 if (
Name[17] ==
's')
3434 IID = Intrinsic::x86_avx512_mul_ps_512;
3436 IID = Intrinsic::x86_avx512_mul_pd_512;
3446 }
else if (
Name.starts_with(
"avx512.mask.sub.p")) {
3447 if (
Name.ends_with(
".512")) {
3449 if (
Name[17] ==
's')
3450 IID = Intrinsic::x86_avx512_sub_ps_512;
3452 IID = Intrinsic::x86_avx512_sub_pd_512;
3462 }
else if ((
Name.starts_with(
"avx512.mask.max.p") ||
3463 Name.starts_with(
"avx512.mask.min.p")) &&
3464 Name.drop_front(18) ==
".512") {
3465 bool IsDouble =
Name[17] ==
'd';
3466 bool IsMin =
Name[13] ==
'i';
3468 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3469 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3477 }
else if (
Name.starts_with(
"avx512.mask.lzcnt.")) {
3480 {CI->getArgOperand(0), Builder.getInt1(false)});
3483 }
else if (
Name.starts_with(
"avx512.mask.psll")) {
3484 bool IsImmediate =
Name[16] ==
'i' || (
Name.size() > 18 &&
Name[18] ==
'i');
3485 bool IsVariable =
Name[16] ==
'v';
3492 if (IsVariable &&
Name[17] !=
'.') {
3493 if (
Size ==
'd' &&
Name[17] ==
'2')
3494 IID = Intrinsic::x86_avx2_psllv_q;
3495 else if (
Size ==
'd' &&
Name[17] ==
'4')
3496 IID = Intrinsic::x86_avx2_psllv_q_256;
3497 else if (
Size ==
's' &&
Name[17] ==
'4')
3498 IID = Intrinsic::x86_avx2_psllv_d;
3499 else if (
Size ==
's' &&
Name[17] ==
'8')
3500 IID = Intrinsic::x86_avx2_psllv_d_256;
3501 else if (
Size ==
'h' &&
Name[17] ==
'8')
3502 IID = Intrinsic::x86_avx512_psllv_w_128;
3503 else if (
Size ==
'h' &&
Name[17] ==
'1')
3504 IID = Intrinsic::x86_avx512_psllv_w_256;
3505 else if (
Name[17] ==
'3' &&
Name[18] ==
'2')
3506 IID = Intrinsic::x86_avx512_psllv_w_512;
3509 }
else if (
Name.ends_with(
".128")) {
3511 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3512 : Intrinsic::x86_sse2_psll_d;
3513 else if (
Size ==
'q')
3514 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3515 : Intrinsic::x86_sse2_psll_q;
3516 else if (
Size ==
'w')
3517 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3518 : Intrinsic::x86_sse2_psll_w;
3521 }
else if (
Name.ends_with(
".256")) {
3523 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3524 : Intrinsic::x86_avx2_psll_d;
3525 else if (
Size ==
'q')
3526 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3527 : Intrinsic::x86_avx2_psll_q;
3528 else if (
Size ==
'w')
3529 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3530 : Intrinsic::x86_avx2_psll_w;
3535 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3536 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3537 : Intrinsic::x86_avx512_psll_d_512;
3538 else if (
Size ==
'q')
3539 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3540 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3541 : Intrinsic::x86_avx512_psll_q_512;
3542 else if (
Size ==
'w')
3543 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3544 : Intrinsic::x86_avx512_psll_w_512;
3550 }
else if (
Name.starts_with(
"avx512.mask.psrl")) {
3551 bool IsImmediate =
Name[16] ==
'i' || (
Name.size() > 18 &&
Name[18] ==
'i');
3552 bool IsVariable =
Name[16] ==
'v';
3559 if (IsVariable &&
Name[17] !=
'.') {
3560 if (
Size ==
'd' &&
Name[17] ==
'2')
3561 IID = Intrinsic::x86_avx2_psrlv_q;
3562 else if (
Size ==
'd' &&
Name[17] ==
'4')
3563 IID = Intrinsic::x86_avx2_psrlv_q_256;
3564 else if (
Size ==
's' &&
Name[17] ==
'4')
3565 IID = Intrinsic::x86_avx2_psrlv_d;
3566 else if (
Size ==
's' &&
Name[17] ==
'8')
3567 IID = Intrinsic::x86_avx2_psrlv_d_256;
3568 else if (
Size ==
'h' &&
Name[17] ==
'8')
3569 IID = Intrinsic::x86_avx512_psrlv_w_128;
3570 else if (
Size ==
'h' &&
Name[17] ==
'1')
3571 IID = Intrinsic::x86_avx512_psrlv_w_256;
3572 else if (
Name[17] ==
'3' &&
Name[18] ==
'2')
3573 IID = Intrinsic::x86_avx512_psrlv_w_512;
3576 }
else if (
Name.ends_with(
".128")) {
3578 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3579 : Intrinsic::x86_sse2_psrl_d;
3580 else if (
Size ==
'q')
3581 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3582 : Intrinsic::x86_sse2_psrl_q;
3583 else if (
Size ==
'w')
3584 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3585 : Intrinsic::x86_sse2_psrl_w;
3588 }
else if (
Name.ends_with(
".256")) {
3590 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3591 : Intrinsic::x86_avx2_psrl_d;
3592 else if (
Size ==
'q')
3593 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3594 : Intrinsic::x86_avx2_psrl_q;
3595 else if (
Size ==
'w')
3596 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3597 : Intrinsic::x86_avx2_psrl_w;
3602 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
3603 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
3604 : Intrinsic::x86_avx512_psrl_d_512;
3605 else if (
Size ==
'q')
3606 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
3607 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
3608 : Intrinsic::x86_avx512_psrl_q_512;
3609 else if (
Size ==
'w')
3610 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3611 : Intrinsic::x86_avx512_psrl_w_512;
3617 }
else if (
Name.starts_with(
"avx512.mask.psra")) {
3618 bool IsImmediate =
Name[16] ==
'i' || (
Name.size() > 18 &&
Name[18] ==
'i');
3619 bool IsVariable =
Name[16] ==
'v';
3626 if (IsVariable &&
Name[17] !=
'.') {
3627 if (
Size ==
's' &&
Name[17] ==
'4')
3628 IID = Intrinsic::x86_avx2_psrav_d;
3629 else if (
Size ==
's' &&
Name[17] ==
'8')
3630 IID = Intrinsic::x86_avx2_psrav_d_256;
3631 else if (
Size ==
'h' &&
Name[17] ==
'8')
3632 IID = Intrinsic::x86_avx512_psrav_w_128;
3633 else if (
Size ==
'h' &&
Name[17] ==
'1')
3634 IID = Intrinsic::x86_avx512_psrav_w_256;
3635 else if (
Name[17] ==
'3' &&
Name[18] ==
'2')
3636 IID = Intrinsic::x86_avx512_psrav_w_512;
3639 }
else if (
Name.ends_with(
".128")) {
3641 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3642 : Intrinsic::x86_sse2_psra_d;
3643 else if (
Size ==
'q')
3644 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
3645 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
3646 : Intrinsic::x86_avx512_psra_q_128;
3647 else if (
Size ==
'w')
3648 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3649 : Intrinsic::x86_sse2_psra_w;
3652 }
else if (
Name.ends_with(
".256")) {
3654 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3655 : Intrinsic::x86_avx2_psra_d;
3656 else if (
Size ==
'q')
3657 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
3658 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
3659 : Intrinsic::x86_avx512_psra_q_256;
3660 else if (
Size ==
'w')
3661 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3662 : Intrinsic::x86_avx2_psra_w;
3667 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
3668 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
3669 : Intrinsic::x86_avx512_psra_d_512;
3670 else if (
Size ==
'q')
3671 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
3672 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
3673 : Intrinsic::x86_avx512_psra_q_512;
3674 else if (
Size ==
'w')
3675 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3676 : Intrinsic::x86_avx512_psra_w_512;
3682 }
else if (
Name.starts_with(
"avx512.mask.move.s")) {
3684 }
else if (
Name.starts_with(
"avx512.cvtmask2")) {
3686 }
else if (
Name.ends_with(
".movntdqa")) {
3694 Ptr, PointerType::getUnqual(CI->
getType()),
"cast");
3700 }
else if (
Name.starts_with(
"fma.vfmadd.") ||
3701 Name.starts_with(
"fma.vfmsub.") ||
3702 Name.starts_with(
"fma.vfnmadd.") ||
3703 Name.starts_with(
"fma.vfnmsub.")) {
3704 bool NegMul =
Name[6] ==
'n';
3705 bool NegAcc = NegMul ?
Name[8] ==
's' :
Name[7] ==
's';
3706 bool IsScalar = NegMul ?
Name[12] ==
's' :
Name[11] ==
's';
3717 if (NegMul && !IsScalar)
3719 if (NegMul && IsScalar)
3728 }
else if (
Name.starts_with(
"fma4.vfmadd.s")) {
3740 }
else if (
Name.starts_with(
"avx512.mask.vfmadd.s") ||
3741 Name.starts_with(
"avx512.maskz.vfmadd.s") ||
3742 Name.starts_with(
"avx512.mask3.vfmadd.s") ||
3743 Name.starts_with(
"avx512.mask3.vfmsub.s") ||
3744 Name.starts_with(
"avx512.mask3.vfnmsub.s")) {
3745 bool IsMask3 =
Name[11] ==
'3';
3746 bool IsMaskZ =
Name[11] ==
'z';
3748 Name =
Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3749 bool NegMul =
Name[2] ==
'n';
3750 bool NegAcc = NegMul ?
Name[4] ==
's' :
Name[3] ==
's';
3756 if (NegMul && (IsMask3 || IsMaskZ))
3758 if (NegMul && !(IsMask3 || IsMaskZ))
3768 cast<ConstantInt>(CI->
getArgOperand(4))->getZExtValue() != 4) {
3772 if (
Name.back() ==
'd')
3773 IID = Intrinsic::x86_avx512_vfmadd_f64;
3775 IID = Intrinsic::x86_avx512_vfmadd_f32;
3787 if (NegAcc && IsMask3)
3794 }
else if (
Name.starts_with(
"avx512.mask.vfmadd.p") ||
3795 Name.starts_with(
"avx512.mask.vfnmadd.p") ||
3796 Name.starts_with(
"avx512.mask.vfnmsub.p") ||
3797 Name.starts_with(
"avx512.mask3.vfmadd.p") ||
3798 Name.starts_with(
"avx512.mask3.vfmsub.p") ||
3799 Name.starts_with(
"avx512.mask3.vfnmsub.p") ||
3800 Name.starts_with(
"avx512.maskz.vfmadd.p")) {
3801 bool IsMask3 =
Name[11] ==
'3';
3802 bool IsMaskZ =
Name[11] ==
'z';
3804 Name =
Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3805 bool NegMul =
Name[2] ==
'n';
3806 bool NegAcc = NegMul ?
Name[4] ==
's' :
Name[3] ==
's';
3812 if (NegMul && (IsMask3 || IsMaskZ))
3814 if (NegMul && !(IsMask3 || IsMaskZ))
3821 cast<ConstantInt>(CI->
getArgOperand(4))->getZExtValue() != 4)) {
3825 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3827 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3839 }
else if (
Name.starts_with(
"fma.vfmsubadd.p")) {
3843 if (VecWidth == 128 && EltWidth == 32)
3844 IID = Intrinsic::x86_fma_vfmaddsub_ps;
3845 else if (VecWidth == 256 && EltWidth == 32)
3846 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
3847 else if (VecWidth == 128 && EltWidth == 64)
3848 IID = Intrinsic::x86_fma_vfmaddsub_pd;
3849 else if (VecWidth == 256 && EltWidth == 64)
3850 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
3858 }
else if (
Name.starts_with(
"avx512.mask.vfmaddsub.p") ||
3859 Name.starts_with(
"avx512.mask3.vfmaddsub.p") ||
3860 Name.starts_with(
"avx512.maskz.vfmaddsub.p") ||
3861 Name.starts_with(
"avx512.mask3.vfmsubadd.p")) {
3862 bool IsMask3 =
Name[11] ==
'3';
3863 bool IsMaskZ =
Name[11] ==
'z';
3865 Name =
Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3866 bool IsSubAdd =
Name[3] ==
's';
3871 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3873 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3882 int NumElts = cast<FixedVectorType>(CI->
getType())->getNumElements();
3897 for (
int i = 0; i != NumElts; ++i)
3898 Idxs[i] = i + (i % 2) * NumElts;
3908 }
else if (
Name.starts_with(
"avx512.mask.pternlog.") ||
3909 Name.starts_with(
"avx512.maskz.pternlog.")) {
3910 bool ZeroMask =
Name[11] ==
'z';
3914 if (VecWidth == 128 && EltWidth == 32)
3915 IID = Intrinsic::x86_avx512_pternlog_d_128;
3916 else if (VecWidth == 256 && EltWidth == 32)
3917 IID = Intrinsic::x86_avx512_pternlog_d_256;
3918 else if (VecWidth == 512 && EltWidth == 32)
3919 IID = Intrinsic::x86_avx512_pternlog_d_512;
3920 else if (VecWidth == 128 && EltWidth == 64)
3921 IID = Intrinsic::x86_avx512_pternlog_q_128;
3922 else if (VecWidth == 256 && EltWidth == 64)
3923 IID = Intrinsic::x86_avx512_pternlog_q_256;
3924 else if (VecWidth == 512 && EltWidth == 64)
3925 IID = Intrinsic::x86_avx512_pternlog_q_512;
3935 }
else if (
Name.starts_with(
"avx512.mask.vpmadd52") ||
3936 Name.starts_with(
"avx512.maskz.vpmadd52")) {
3937 bool ZeroMask =
Name[11] ==
'z';
3941 if (VecWidth == 128 && !
High)
3942 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3943 else if (VecWidth == 256 && !
High)
3944 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3945 else if (VecWidth == 512 && !
High)
3946 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3947 else if (VecWidth == 128 &&
High)
3948 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3949 else if (VecWidth == 256 &&
High)
3950 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3951 else if (VecWidth == 512 &&
High)
3952 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3962 }
else if (
Name.starts_with(
"avx512.mask.vpermi2var.") ||
3963 Name.starts_with(
"avx512.mask.vpermt2var.") ||
3964 Name.starts_with(
"avx512.maskz.vpermt2var.")) {
3965 bool ZeroMask =
Name[11] ==
'z';
3966 bool IndexForm =
Name[17] ==
'i';
3968 }
else if (
Name.starts_with(
"avx512.mask.vpdpbusd.") ||
3969 Name.starts_with(
"avx512.maskz.vpdpbusd.") ||
3970 Name.starts_with(
"avx512.mask.vpdpbusds.") ||
3971 Name.starts_with(
"avx512.maskz.vpdpbusds.")) {
3972 bool ZeroMask =
Name[11] ==
'z';
3973 bool IsSaturating =
Name[ZeroMask ? 21 : 20] ==
's';
3976 if (VecWidth == 128 && !IsSaturating)
3977 IID = Intrinsic::x86_avx512_vpdpbusd_128;
3978 else if (VecWidth == 256 && !IsSaturating)
3979 IID = Intrinsic::x86_avx512_vpdpbusd_256;
3980 else if (VecWidth == 512 && !IsSaturating)
3981 IID = Intrinsic::x86_avx512_vpdpbusd_512;
3982 else if (VecWidth == 128 && IsSaturating)
3983 IID = Intrinsic::x86_avx512_vpdpbusds_128;
3984 else if (VecWidth == 256 && IsSaturating)
3985 IID = Intrinsic::x86_avx512_vpdpbusds_256;
3986 else if (VecWidth == 512 && IsSaturating)
3987 IID = Intrinsic::x86_avx512_vpdpbusds_512;
3997 }
else if (
Name.starts_with(
"avx512.mask.vpdpwssd.") ||
3998 Name.starts_with(
"avx512.maskz.vpdpwssd.") ||
3999 Name.starts_with(
"avx512.mask.vpdpwssds.") ||
4000 Name.starts_with(
"avx512.maskz.vpdpwssds.")) {
4001 bool ZeroMask =
Name[11] ==
'z';
4002 bool IsSaturating =
Name[ZeroMask ? 21 : 20] ==
's';
4005 if (VecWidth == 128 && !IsSaturating)
4006 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4007 else if (VecWidth == 256 && !IsSaturating)
4008 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4009 else if (VecWidth == 512 && !IsSaturating)
4010 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4011 else if (VecWidth == 128 && IsSaturating)
4012 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4013 else if (VecWidth == 256 && IsSaturating)
4014 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4015 else if (VecWidth == 512 && IsSaturating)
4016 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4026 }
else if (
Name ==
"addcarryx.u32" ||
Name ==
"addcarryx.u64" ||
4027 Name ==
"addcarry.u32" ||
Name ==
"addcarry.u64" ||
4028 Name ==
"subborrow.u32" ||
Name ==
"subborrow.u64") {
4030 if (
Name[0] ==
'a' &&
Name.back() ==
'2')
4031 IID = Intrinsic::x86_addcarry_32;
4032 else if (
Name[0] ==
'a' &&
Name.back() ==
'4')
4033 IID = Intrinsic::x86_addcarry_64;
4034 else if (
Name[0] ==
's' &&
Name.back() ==
'2')
4035 IID = Intrinsic::x86_subborrow_32;
4036 else if (
Name[0] ==
's' &&
Name.back() ==
'4')
4037 IID = Intrinsic::x86_subborrow_64;
4057 }
else if (
Name.starts_with(
"avx512.mask.") &&
4069 .
Case(
"sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4070 .
Case(
"sve.fcvtnt.bf16f32", Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4082 if (Args[1]->
getType() != BadPredTy)
4085 Args[1] = Builder.
CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4086 BadPredTy, Args[1]);
4087 Args[1] = Builder.
CreateIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool,
4088 GoodPredTy, Args[1]);
4096 if (
Name ==
"mve.vctp64.old") {
4103 Intrinsic::arm_mve_pred_v2i,
4104 {VectorType::get(Builder.
getInt1Ty(), 2,
false)}, VCTP);
4106 Intrinsic::arm_mve_pred_i2v,
4107 {VectorType::get(Builder.
getInt1Ty(), 4,
false)}, C1);
4108 }
else if (
Name ==
"mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4109 Name ==
"mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4110 Name ==
"mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4111 Name ==
"mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4113 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4114 Name ==
"mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4115 Name ==
"mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4116 Name ==
"mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4118 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4119 Name ==
"mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4120 Name ==
"cde.vcx1q.predicated.v2i64.v4i1" ||
4121 Name ==
"cde.vcx1qa.predicated.v2i64.v4i1" ||
4122 Name ==
"cde.vcx2q.predicated.v2i64.v4i1" ||