LLVM  16.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the auto-upgrade helper functions.
10 // This is where deprecated IR intrinsics and other IR features are updated to
11 // current specifications.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/IR/AutoUpgrade.h"
16 #include "llvm/ADT/StringSwitch.h"
17 #include "llvm/ADT/Triple.h"
18 #include "llvm/IR/Constants.h"
19 #include "llvm/IR/DebugInfo.h"
20 #include "llvm/IR/DiagnosticInfo.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/IRBuilder.h"
23 #include "llvm/IR/InstVisitor.h"
24 #include "llvm/IR/Instruction.h"
25 #include "llvm/IR/IntrinsicInst.h"
26 #include "llvm/IR/Intrinsics.h"
27 #include "llvm/IR/IntrinsicsAArch64.h"
28 #include "llvm/IR/IntrinsicsARM.h"
29 #include "llvm/IR/IntrinsicsX86.h"
30 #include "llvm/IR/LLVMContext.h"
31 #include "llvm/IR/Module.h"
32 #include "llvm/IR/Verifier.h"
34 #include "llvm/Support/Regex.h"
35 #include <cstring>
36 using namespace llvm;
37 
38 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
39 
40 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
41 // changed their type from v4f32 to v2i64.
43  Function *&NewFn) {
44  // Check whether this is an old version of the function, which received
45  // v4f32 arguments.
46  Type *Arg0Type = F->getFunctionType()->getParamType(0);
47  if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
48  return false;
49 
50  // Yes, it's old, replace it with new version.
51  rename(F);
52  NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
53  return true;
54 }
55 
56 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
57 // arguments have changed their type from i32 to i8.
59  Function *&NewFn) {
60  // Check that the last argument is an i32.
61  Type *LastArgType = F->getFunctionType()->getParamType(
62  F->getFunctionType()->getNumParams() - 1);
63  if (!LastArgType->isIntegerTy(32))
64  return false;
65 
66  // Move this function aside and map down.
67  rename(F);
68  NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
69  return true;
70 }
71 
72 // Upgrade the declaration of fp compare intrinsics that change return type
73 // from scalar to vXi1 mask.
75  Function *&NewFn) {
76  // Check if the return type is a vector.
77  if (F->getReturnType()->isVectorTy())
78  return false;
79 
80  rename(F);
81  NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
82  return true;
83 }
84 
86  Function *&NewFn) {
87  if (F->getReturnType()->getScalarType()->isBFloatTy())
88  return false;
89 
90  rename(F);
91  NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
92  return true;
93 }
94 
96  Function *&NewFn) {
97  if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
98  return false;
99 
100  rename(F);
101  NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
102  return true;
103 }
104 
106  // All of the intrinsics matches below should be marked with which llvm
107  // version started autoupgrading them. At some point in the future we would
108  // like to use this information to remove upgrade code for some older
109  // intrinsics. It is currently undecided how we will determine that future
110  // point.
111  if (Name == "addcarryx.u32" || // Added in 8.0
112  Name == "addcarryx.u64" || // Added in 8.0
113  Name == "addcarry.u32" || // Added in 8.0
114  Name == "addcarry.u64" || // Added in 8.0
115  Name == "subborrow.u32" || // Added in 8.0
116  Name == "subborrow.u64" || // Added in 8.0
117  Name.startswith("sse2.padds.") || // Added in 8.0
118  Name.startswith("sse2.psubs.") || // Added in 8.0
119  Name.startswith("sse2.paddus.") || // Added in 8.0
120  Name.startswith("sse2.psubus.") || // Added in 8.0
121  Name.startswith("avx2.padds.") || // Added in 8.0
122  Name.startswith("avx2.psubs.") || // Added in 8.0
123  Name.startswith("avx2.paddus.") || // Added in 8.0
124  Name.startswith("avx2.psubus.") || // Added in 8.0
125  Name.startswith("avx512.padds.") || // Added in 8.0
126  Name.startswith("avx512.psubs.") || // Added in 8.0
127  Name.startswith("avx512.mask.padds.") || // Added in 8.0
128  Name.startswith("avx512.mask.psubs.") || // Added in 8.0
129  Name.startswith("avx512.mask.paddus.") || // Added in 8.0
130  Name.startswith("avx512.mask.psubus.") || // Added in 8.0
131  Name=="ssse3.pabs.b.128" || // Added in 6.0
132  Name=="ssse3.pabs.w.128" || // Added in 6.0
133  Name=="ssse3.pabs.d.128" || // Added in 6.0
134  Name.startswith("fma4.vfmadd.s") || // Added in 7.0
135  Name.startswith("fma.vfmadd.") || // Added in 7.0
136  Name.startswith("fma.vfmsub.") || // Added in 7.0
137  Name.startswith("fma.vfmsubadd.") || // Added in 7.0
138  Name.startswith("fma.vfnmadd.") || // Added in 7.0
139  Name.startswith("fma.vfnmsub.") || // Added in 7.0
140  Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0
141  Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
142  Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
143  Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
144  Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
145  Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
146  Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
147  Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
148  Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
149  Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
150  Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
151  Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
152  Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
153  Name.startswith("avx512.kunpck") || //added in 6.0
154  Name.startswith("avx2.pabs.") || // Added in 6.0
155  Name.startswith("avx512.mask.pabs.") || // Added in 6.0
156  Name.startswith("avx512.broadcastm") || // Added in 6.0
157  Name == "sse.sqrt.ss" || // Added in 7.0
158  Name == "sse2.sqrt.sd" || // Added in 7.0
159  Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0
160  Name.startswith("avx.sqrt.p") || // Added in 7.0
161  Name.startswith("sse2.sqrt.p") || // Added in 7.0
162  Name.startswith("sse.sqrt.p") || // Added in 7.0
163  Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
164  Name.startswith("sse2.pcmpeq.") || // Added in 3.1
165  Name.startswith("sse2.pcmpgt.") || // Added in 3.1
166  Name.startswith("avx2.pcmpeq.") || // Added in 3.1
167  Name.startswith("avx2.pcmpgt.") || // Added in 3.1
168  Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
169  Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
170  Name.startswith("avx.vperm2f128.") || // Added in 6.0
171  Name == "avx2.vperm2i128" || // Added in 6.0
172  Name == "sse.add.ss" || // Added in 4.0
173  Name == "sse2.add.sd" || // Added in 4.0
174  Name == "sse.sub.ss" || // Added in 4.0
175  Name == "sse2.sub.sd" || // Added in 4.0
176  Name == "sse.mul.ss" || // Added in 4.0
177  Name == "sse2.mul.sd" || // Added in 4.0
178  Name == "sse.div.ss" || // Added in 4.0
179  Name == "sse2.div.sd" || // Added in 4.0
180  Name == "sse41.pmaxsb" || // Added in 3.9
181  Name == "sse2.pmaxs.w" || // Added in 3.9
182  Name == "sse41.pmaxsd" || // Added in 3.9
183  Name == "sse2.pmaxu.b" || // Added in 3.9
184  Name == "sse41.pmaxuw" || // Added in 3.9
185  Name == "sse41.pmaxud" || // Added in 3.9
186  Name == "sse41.pminsb" || // Added in 3.9
187  Name == "sse2.pmins.w" || // Added in 3.9
188  Name == "sse41.pminsd" || // Added in 3.9
189  Name == "sse2.pminu.b" || // Added in 3.9
190  Name == "sse41.pminuw" || // Added in 3.9
191  Name == "sse41.pminud" || // Added in 3.9
192  Name == "avx512.kand.w" || // Added in 7.0
193  Name == "avx512.kandn.w" || // Added in 7.0
194  Name == "avx512.knot.w" || // Added in 7.0
195  Name == "avx512.kor.w" || // Added in 7.0
196  Name == "avx512.kxor.w" || // Added in 7.0
197  Name == "avx512.kxnor.w" || // Added in 7.0
198  Name == "avx512.kortestc.w" || // Added in 7.0
199  Name == "avx512.kortestz.w" || // Added in 7.0
200  Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
201  Name.startswith("avx2.pmax") || // Added in 3.9
202  Name.startswith("avx2.pmin") || // Added in 3.9
203  Name.startswith("avx512.mask.pmax") || // Added in 4.0
204  Name.startswith("avx512.mask.pmin") || // Added in 4.0
205  Name.startswith("avx2.vbroadcast") || // Added in 3.8
206  Name.startswith("avx2.pbroadcast") || // Added in 3.8
207  Name.startswith("avx.vpermil.") || // Added in 3.1
208  Name.startswith("sse2.pshuf") || // Added in 3.9
209  Name.startswith("avx512.pbroadcast") || // Added in 3.9
210  Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
211  Name.startswith("avx512.mask.movddup") || // Added in 3.9
212  Name.startswith("avx512.mask.movshdup") || // Added in 3.9
213  Name.startswith("avx512.mask.movsldup") || // Added in 3.9
214  Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
215  Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
216  Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
217  Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
218  Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
219  Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
220  Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
221  Name.startswith("avx512.mask.punpckl") || // Added in 3.9
222  Name.startswith("avx512.mask.punpckh") || // Added in 3.9
223  Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
224  Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
225  Name.startswith("avx512.mask.pand.") || // Added in 3.9
226  Name.startswith("avx512.mask.pandn.") || // Added in 3.9
227  Name.startswith("avx512.mask.por.") || // Added in 3.9
228  Name.startswith("avx512.mask.pxor.") || // Added in 3.9
229  Name.startswith("avx512.mask.and.") || // Added in 3.9
230  Name.startswith("avx512.mask.andn.") || // Added in 3.9
231  Name.startswith("avx512.mask.or.") || // Added in 3.9
232  Name.startswith("avx512.mask.xor.") || // Added in 3.9
233  Name.startswith("avx512.mask.padd.") || // Added in 4.0
234  Name.startswith("avx512.mask.psub.") || // Added in 4.0
235  Name.startswith("avx512.mask.pmull.") || // Added in 4.0
236  Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
237  Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
238  Name.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0
239  Name.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0
240  Name.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0
241  Name.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0
242  Name == "avx512.mask.vcvtph2ps.128" || // Added in 11.0
243  Name == "avx512.mask.vcvtph2ps.256" || // Added in 11.0
244  Name == "avx512.mask.cvtqq2ps.256" || // Added in 9.0
245  Name == "avx512.mask.cvtqq2ps.512" || // Added in 9.0
246  Name == "avx512.mask.cvtuqq2ps.256" || // Added in 9.0
247  Name == "avx512.mask.cvtuqq2ps.512" || // Added in 9.0
248  Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0
249  Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0
250  Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0
251  Name == "avx512.mask.cvttps2dq.128" || // Added in 7.0
252  Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0
253  Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
254  Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
255  Name == "avx512.cvtusi2sd" || // Added in 7.0
256  Name.startswith("avx512.mask.permvar.") || // Added in 7.0
257  Name == "sse2.pmulu.dq" || // Added in 7.0
258  Name == "sse41.pmuldq" || // Added in 7.0
259  Name == "avx2.pmulu.dq" || // Added in 7.0
260  Name == "avx2.pmul.dq" || // Added in 7.0
261  Name == "avx512.pmulu.dq.512" || // Added in 7.0
262  Name == "avx512.pmul.dq.512" || // Added in 7.0
263  Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
264  Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
265  Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
266  Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
267  Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
268  Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
269  Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
270  Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
271  Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
272  Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
273  Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
274  Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
275  Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
276  Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
277  Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
278  Name.startswith("avx512.cmp.p") || // Added in 12.0
279  Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
280  Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
281  Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
282  Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
283  Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
284  Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
285  Name.startswith("avx512.mask.psll.d") || // Added in 4.0
286  Name.startswith("avx512.mask.psll.q") || // Added in 4.0
287  Name.startswith("avx512.mask.psll.w") || // Added in 4.0
288  Name.startswith("avx512.mask.psra.d") || // Added in 4.0
289  Name.startswith("avx512.mask.psra.q") || // Added in 4.0
290  Name.startswith("avx512.mask.psra.w") || // Added in 4.0
291  Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
292  Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
293  Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
294  Name.startswith("avx512.mask.pslli") || // Added in 4.0
295  Name.startswith("avx512.mask.psrai") || // Added in 4.0
296  Name.startswith("avx512.mask.psrli") || // Added in 4.0
297  Name.startswith("avx512.mask.psllv") || // Added in 4.0
298  Name.startswith("avx512.mask.psrav") || // Added in 4.0
299  Name.startswith("avx512.mask.psrlv") || // Added in 4.0
300  Name.startswith("sse41.pmovsx") || // Added in 3.8
301  Name.startswith("sse41.pmovzx") || // Added in 3.9
302  Name.startswith("avx2.pmovsx") || // Added in 3.9
303  Name.startswith("avx2.pmovzx") || // Added in 3.9
304  Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
305  Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
306  Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
307  Name.startswith("avx512.mask.pternlog.") || // Added in 7.0
308  Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
309  Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0
310  Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
311  Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
312  Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
313  Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
314  Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
315  Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
316  Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
317  Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
318  Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
319  Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
320  Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
321  Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
322  Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
323  Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
324  Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
325  Name.startswith("avx512.mask.vpshldv.") || // Added in 8.0
326  Name.startswith("avx512.mask.vpshrdv.") || // Added in 8.0
327  Name.startswith("avx512.maskz.vpshldv.") || // Added in 8.0
328  Name.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0
329  Name.startswith("avx512.vpshld.") || // Added in 8.0
330  Name.startswith("avx512.vpshrd.") || // Added in 8.0
331  Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
332  Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
333  Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
334  Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
335  Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
336  Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
337  Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
338  Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
339  Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
340  Name.startswith("avx512.mask.conflict.") || // Added in 9.0
341  Name == "avx512.mask.pmov.qd.256" || // Added in 9.0
342  Name == "avx512.mask.pmov.qd.512" || // Added in 9.0
343  Name == "avx512.mask.pmov.wb.256" || // Added in 9.0
344  Name == "avx512.mask.pmov.wb.512" || // Added in 9.0
345  Name == "sse.cvtsi2ss" || // Added in 7.0
346  Name == "sse.cvtsi642ss" || // Added in 7.0
347  Name == "sse2.cvtsi2sd" || // Added in 7.0
348  Name == "sse2.cvtsi642sd" || // Added in 7.0
349  Name == "sse2.cvtss2sd" || // Added in 7.0
350  Name == "sse2.cvtdq2pd" || // Added in 3.9
351  Name == "sse2.cvtdq2ps" || // Added in 7.0
352  Name == "sse2.cvtps2pd" || // Added in 3.9
353  Name == "avx.cvtdq2.pd.256" || // Added in 3.9
354  Name == "avx.cvtdq2.ps.256" || // Added in 7.0
355  Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
356  Name.startswith("vcvtph2ps.") || // Added in 11.0
357  Name.startswith("avx.vinsertf128.") || // Added in 3.7
358  Name == "avx2.vinserti128" || // Added in 3.7
359  Name.startswith("avx512.mask.insert") || // Added in 4.0
360  Name.startswith("avx.vextractf128.") || // Added in 3.7
361  Name == "avx2.vextracti128" || // Added in 3.7
362  Name.startswith("avx512.mask.vextract") || // Added in 4.0
363  Name.startswith("sse4a.movnt.") || // Added in 3.9
364  Name.startswith("avx.movnt.") || // Added in 3.2
365  Name.startswith("avx512.storent.") || // Added in 3.9
366  Name == "sse41.movntdqa" || // Added in 5.0
367  Name == "avx2.movntdqa" || // Added in 5.0
368  Name == "avx512.movntdqa" || // Added in 5.0
369  Name == "sse2.storel.dq" || // Added in 3.9
370  Name.startswith("sse.storeu.") || // Added in 3.9
371  Name.startswith("sse2.storeu.") || // Added in 3.9
372  Name.startswith("avx.storeu.") || // Added in 3.9
373  Name.startswith("avx512.mask.storeu.") || // Added in 3.9
374  Name.startswith("avx512.mask.store.p") || // Added in 3.9
375  Name.startswith("avx512.mask.store.b.") || // Added in 3.9
376  Name.startswith("avx512.mask.store.w.") || // Added in 3.9
377  Name.startswith("avx512.mask.store.d.") || // Added in 3.9
378  Name.startswith("avx512.mask.store.q.") || // Added in 3.9
379  Name == "avx512.mask.store.ss" || // Added in 7.0
380  Name.startswith("avx512.mask.loadu.") || // Added in 3.9
381  Name.startswith("avx512.mask.load.") || // Added in 3.9
382  Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
383  Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
384  Name.startswith("avx512.mask.expand.b") || // Added in 9.0
385  Name.startswith("avx512.mask.expand.w") || // Added in 9.0
386  Name.startswith("avx512.mask.expand.d") || // Added in 9.0
387  Name.startswith("avx512.mask.expand.q") || // Added in 9.0
388  Name.startswith("avx512.mask.expand.p") || // Added in 9.0
389  Name.startswith("avx512.mask.compress.b") || // Added in 9.0
390  Name.startswith("avx512.mask.compress.w") || // Added in 9.0
391  Name.startswith("avx512.mask.compress.d") || // Added in 9.0
392  Name.startswith("avx512.mask.compress.q") || // Added in 9.0
393  Name.startswith("avx512.mask.compress.p") || // Added in 9.0
394  Name == "sse42.crc32.64.8" || // Added in 3.4
395  Name.startswith("avx.vbroadcast.s") || // Added in 3.5
396  Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
397  Name.startswith("avx512.mask.palignr.") || // Added in 3.9
398  Name.startswith("avx512.mask.valign.") || // Added in 4.0
399  Name.startswith("sse2.psll.dq") || // Added in 3.7
400  Name.startswith("sse2.psrl.dq") || // Added in 3.7
401  Name.startswith("avx2.psll.dq") || // Added in 3.7
402  Name.startswith("avx2.psrl.dq") || // Added in 3.7
403  Name.startswith("avx512.psll.dq") || // Added in 3.9
404  Name.startswith("avx512.psrl.dq") || // Added in 3.9
405  Name == "sse41.pblendw" || // Added in 3.7
406  Name.startswith("sse41.blendp") || // Added in 3.7
407  Name.startswith("avx.blend.p") || // Added in 3.7
408  Name == "avx2.pblendw" || // Added in 3.7
409  Name.startswith("avx2.pblendd.") || // Added in 3.7
410  Name.startswith("avx.vbroadcastf128") || // Added in 4.0
411  Name == "avx2.vbroadcasti128" || // Added in 3.7
412  Name.startswith("avx512.mask.broadcastf32x4.") || // Added in 6.0
413  Name.startswith("avx512.mask.broadcastf64x2.") || // Added in 6.0
414  Name.startswith("avx512.mask.broadcastf32x8.") || // Added in 6.0
415  Name.startswith("avx512.mask.broadcastf64x4.") || // Added in 6.0
416  Name.startswith("avx512.mask.broadcasti32x4.") || // Added in 6.0
417  Name.startswith("avx512.mask.broadcasti64x2.") || // Added in 6.0
418  Name.startswith("avx512.mask.broadcasti32x8.") || // Added in 6.0
419  Name.startswith("avx512.mask.broadcasti64x4.") || // Added in 6.0
420  Name == "xop.vpcmov" || // Added in 3.8
421  Name == "xop.vpcmov.256" || // Added in 5.0
422  Name.startswith("avx512.mask.move.s") || // Added in 4.0
423  Name.startswith("avx512.cvtmask2") || // Added in 5.0
424  Name.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0
425  Name.startswith("xop.vprot") || // Added in 8.0
426  Name.startswith("avx512.prol") || // Added in 8.0
427  Name.startswith("avx512.pror") || // Added in 8.0
428  Name.startswith("avx512.mask.prorv.") || // Added in 8.0
429  Name.startswith("avx512.mask.pror.") || // Added in 8.0
430  Name.startswith("avx512.mask.prolv.") || // Added in 8.0
431  Name.startswith("avx512.mask.prol.") || // Added in 8.0
432  Name.startswith("avx512.ptestm") || //Added in 6.0
433  Name.startswith("avx512.ptestnm") || //Added in 6.0
434  Name.startswith("avx512.mask.pavg")) // Added in 6.0
435  return true;
436 
437  return false;
438 }
439 
441  Function *&NewFn) {
442  // Only handle intrinsics that start with "x86.".
443  if (!Name.startswith("x86."))
444  return false;
445  // Remove "x86." prefix.
446  Name = Name.substr(4);
447 
448  if (ShouldUpgradeX86Intrinsic(F, Name)) {
449  NewFn = nullptr;
450  return true;
451  }
452 
453  if (Name == "rdtscp") { // Added in 8.0
454  // If this intrinsic has 0 operands, it's the new version.
455  if (F->getFunctionType()->getNumParams() == 0)
456  return false;
457 
458  rename(F);
459  NewFn = Intrinsic::getDeclaration(F->getParent(),
460  Intrinsic::x86_rdtscp);
461  return true;
462  }
463 
464  // SSE4.1 ptest functions may have an old signature.
465  if (Name.startswith("sse41.ptest")) { // Added in 3.2
466  if (Name.substr(11) == "c")
467  return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
468  if (Name.substr(11) == "z")
469  return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
470  if (Name.substr(11) == "nzc")
471  return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
472  }
473  // Several blend and other instructions with masks used the wrong number of
474  // bits.
475  if (Name == "sse41.insertps") // Added in 3.6
476  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
477  NewFn);
478  if (Name == "sse41.dppd") // Added in 3.6
479  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
480  NewFn);
481  if (Name == "sse41.dpps") // Added in 3.6
482  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
483  NewFn);
484  if (Name == "sse41.mpsadbw") // Added in 3.6
485  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
486  NewFn);
487  if (Name == "avx.dp.ps.256") // Added in 3.6
488  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
489  NewFn);
490  if (Name == "avx2.mpsadbw") // Added in 3.6
491  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
492  NewFn);
493  if (Name == "avx512.mask.cmp.pd.128") // Added in 7.0
494  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_128,
495  NewFn);
496  if (Name == "avx512.mask.cmp.pd.256") // Added in 7.0
497  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_256,
498  NewFn);
499  if (Name == "avx512.mask.cmp.pd.512") // Added in 7.0
500  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_512,
501  NewFn);
502  if (Name == "avx512.mask.cmp.ps.128") // Added in 7.0
503  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_128,
504  NewFn);
505  if (Name == "avx512.mask.cmp.ps.256") // Added in 7.0
506  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_256,
507  NewFn);
508  if (Name == "avx512.mask.cmp.ps.512") // Added in 7.0
509  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_512,
510  NewFn);
511  if (Name == "avx512bf16.cvtne2ps2bf16.128") // Added in 9.0
513  F, Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128, NewFn);
514  if (Name == "avx512bf16.cvtne2ps2bf16.256") // Added in 9.0
516  F, Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256, NewFn);
517  if (Name == "avx512bf16.cvtne2ps2bf16.512") // Added in 9.0
519  F, Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512, NewFn);
520  if (Name == "avx512bf16.mask.cvtneps2bf16.128") // Added in 9.0
522  F, Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128, NewFn);
523  if (Name == "avx512bf16.cvtneps2bf16.256") // Added in 9.0
525  F, Intrinsic::x86_avx512bf16_cvtneps2bf16_256, NewFn);
526  if (Name == "avx512bf16.cvtneps2bf16.512") // Added in 9.0
528  F, Intrinsic::x86_avx512bf16_cvtneps2bf16_512, NewFn);
529  if (Name == "avx512bf16.dpbf16ps.128") // Added in 9.0
531  F, Intrinsic::x86_avx512bf16_dpbf16ps_128, NewFn);
532  if (Name == "avx512bf16.dpbf16ps.256") // Added in 9.0
534  F, Intrinsic::x86_avx512bf16_dpbf16ps_256, NewFn);
535  if (Name == "avx512bf16.dpbf16ps.512") // Added in 9.0
537  F, Intrinsic::x86_avx512bf16_dpbf16ps_512, NewFn);
538 
539  // frcz.ss/sd may need to have an argument dropped. Added in 3.2
540  if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
541  rename(F);
542  NewFn = Intrinsic::getDeclaration(F->getParent(),
543  Intrinsic::x86_xop_vfrcz_ss);
544  return true;
545  }
546  if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
547  rename(F);
548  NewFn = Intrinsic::getDeclaration(F->getParent(),
549  Intrinsic::x86_xop_vfrcz_sd);
550  return true;
551  }
552  // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
553  if (Name.startswith("xop.vpermil2")) { // Added in 3.9
554  auto Idx = F->getFunctionType()->getParamType(2);
555  if (Idx->isFPOrFPVectorTy()) {
556  rename(F);
557  unsigned IdxSize = Idx->getPrimitiveSizeInBits();
558  unsigned EltSize = Idx->getScalarSizeInBits();
559  Intrinsic::ID Permil2ID;
560  if (EltSize == 64 && IdxSize == 128)
561  Permil2ID = Intrinsic::x86_xop_vpermil2pd;
562  else if (EltSize == 32 && IdxSize == 128)
563  Permil2ID = Intrinsic::x86_xop_vpermil2ps;
564  else if (EltSize == 64 && IdxSize == 256)
565  Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
566  else
567  Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
568  NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
569  return true;
570  }
571  }
572 
573  if (Name == "seh.recoverfp") {
574  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
575  return true;
576  }
577 
578  return false;
579 }
580 
582  assert(F && "Illegal to upgrade a non-existent Function.");
583 
584  // Quickly eliminate it, if it's not a candidate.
585  StringRef Name = F->getName();
586  if (Name.size() <= 7 || !Name.startswith("llvm."))
587  return false;
588  Name = Name.substr(5); // Strip off "llvm."
589 
590  switch (Name[0]) {
591  default: break;
592  case 'a': {
593  if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
594  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
595  F->arg_begin()->getType());
596  return true;
597  }
598  if (Name.startswith("aarch64.neon.frintn")) {
599  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::roundeven,
600  F->arg_begin()->getType());
601  return true;
602  }
603  if (Name.startswith("aarch64.neon.rbit")) {
604  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
605  F->arg_begin()->getType());
606  return true;
607  }
608  static const Regex LdRegex("^aarch64\\.sve\\.ld[234](.nxv[a-z0-9]+|$)");
609  if (LdRegex.match(Name)) {
610  Type *ScalarTy =
611  dyn_cast<VectorType>(F->getReturnType())->getElementType();
612  ElementCount EC =
613  dyn_cast<VectorType>(F->arg_begin()->getType())->getElementCount();
614  Type *Ty = VectorType::get(ScalarTy, EC);
615  Intrinsic::ID ID =
617  .StartsWith("aarch64.sve.ld2", Intrinsic::aarch64_sve_ld2_sret)
618  .StartsWith("aarch64.sve.ld3", Intrinsic::aarch64_sve_ld3_sret)
619  .StartsWith("aarch64.sve.ld4", Intrinsic::aarch64_sve_ld4_sret)
621  NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Ty);
622  return true;
623  }
624  if (Name.startswith("aarch64.sve.tuple.get")) {
625  Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
626  NewFn = Intrinsic::getDeclaration(F->getParent(),
627  Intrinsic::vector_extract, Tys);
628  return true;
629  }
630  if (Name.startswith("aarch64.sve.tuple.set")) {
631  auto Args = F->getFunctionType()->params();
632  Type *Tys[] = {Args[0], Args[2], Args[1]};
633  NewFn = Intrinsic::getDeclaration(F->getParent(),
634  Intrinsic::vector_insert, Tys);
635  return true;
636  }
637  static const Regex CreateTupleRegex(
638  "^aarch64\\.sve\\.tuple\\.create[234](.nxv[a-z0-9]+|$)");
639  if (CreateTupleRegex.match(Name)) {
640  auto Args = F->getFunctionType()->params();
641  Type *Tys[] = {F->getReturnType(), Args[1]};
642  NewFn = Intrinsic::getDeclaration(F->getParent(),
643  Intrinsic::vector_insert, Tys);
644  return true;
645  }
646  if (Name.startswith("arm.neon.vclz")) {
647  Type* args[2] = {
648  F->arg_begin()->getType(),
649  Type::getInt1Ty(F->getContext())
650  };
651  // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
652  // the end of the name. Change name from llvm.arm.neon.vclz.* to
653  // llvm.ctlz.*
654  FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
655  NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
656  "llvm.ctlz." + Name.substr(14), F->getParent());
657  return true;
658  }
659  if (Name.startswith("arm.neon.vcnt")) {
660  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
661  F->arg_begin()->getType());
662  return true;
663  }
664  static const Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
665  if (vstRegex.match(Name)) {
666  static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
667  Intrinsic::arm_neon_vst2,
668  Intrinsic::arm_neon_vst3,
669  Intrinsic::arm_neon_vst4};
670 
671  static const Intrinsic::ID StoreLaneInts[] = {
672  Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
673  Intrinsic::arm_neon_vst4lane
674  };
675 
676  auto fArgs = F->getFunctionType()->params();
677  Type *Tys[] = {fArgs[0], fArgs[1]};
678  if (!Name.contains("lane"))
679  NewFn = Intrinsic::getDeclaration(F->getParent(),
680  StoreInts[fArgs.size() - 3], Tys);
681  else
682  NewFn = Intrinsic::getDeclaration(F->getParent(),
683  StoreLaneInts[fArgs.size() - 5], Tys);
684  return true;
685  }
686  if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
687  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
688  return true;
689  }
690  if (Name.startswith("arm.neon.vqadds.")) {
691  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::sadd_sat,
692  F->arg_begin()->getType());
693  return true;
694  }
695  if (Name.startswith("arm.neon.vqaddu.")) {
696  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::uadd_sat,
697  F->arg_begin()->getType());
698  return true;
699  }
700  if (Name.startswith("arm.neon.vqsubs.")) {
701  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ssub_sat,
702  F->arg_begin()->getType());
703  return true;
704  }
705  if (Name.startswith("arm.neon.vqsubu.")) {
706  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::usub_sat,
707  F->arg_begin()->getType());
708  return true;
709  }
710  if (Name.startswith("aarch64.neon.addp")) {
711  if (F->arg_size() != 2)
712  break; // Invalid IR.
713  VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
714  if (Ty && Ty->getElementType()->isFloatingPointTy()) {
715  NewFn = Intrinsic::getDeclaration(F->getParent(),
716  Intrinsic::aarch64_neon_faddp, Ty);
717  return true;
718  }
719  }
720 
721  // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and v16i8
722  // respectively
723  if ((Name.startswith("arm.neon.bfdot.") ||
724  Name.startswith("aarch64.neon.bfdot.")) &&
725  Name.endswith("i8")) {
726  Intrinsic::ID IID =
728  .Cases("arm.neon.bfdot.v2f32.v8i8",
729  "arm.neon.bfdot.v4f32.v16i8",
730  Intrinsic::arm_neon_bfdot)
731  .Cases("aarch64.neon.bfdot.v2f32.v8i8",
732  "aarch64.neon.bfdot.v4f32.v16i8",
733  Intrinsic::aarch64_neon_bfdot)
735  if (IID == Intrinsic::not_intrinsic)
736  break;
737 
738  size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
739  assert((OperandWidth == 64 || OperandWidth == 128) &&
740  "Unexpected operand width");
741  LLVMContext &Ctx = F->getParent()->getContext();
742  std::array<Type *, 2> Tys {{
743  F->getReturnType(),
744  FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)
745  }};
746  NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
747  return true;
748  }
749 
750  // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic anymore
751  // and accept v8bf16 instead of v16i8
752  if ((Name.startswith("arm.neon.bfm") ||
753  Name.startswith("aarch64.neon.bfm")) &&
754  Name.endswith(".v4f32.v16i8")) {
755  Intrinsic::ID IID =
757  .Case("arm.neon.bfmmla.v4f32.v16i8",
758  Intrinsic::arm_neon_bfmmla)
759  .Case("arm.neon.bfmlalb.v4f32.v16i8",
760  Intrinsic::arm_neon_bfmlalb)
761  .Case("arm.neon.bfmlalt.v4f32.v16i8",
762  Intrinsic::arm_neon_bfmlalt)
763  .Case("aarch64.neon.bfmmla.v4f32.v16i8",
764  Intrinsic::aarch64_neon_bfmmla)
765  .Case("aarch64.neon.bfmlalb.v4f32.v16i8",
766  Intrinsic::aarch64_neon_bfmlalb)
767  .Case("aarch64.neon.bfmlalt.v4f32.v16i8",
768  Intrinsic::aarch64_neon_bfmlalt)
770  if (IID == Intrinsic::not_intrinsic)
771  break;
772 
773  std::array<Type *, 0> Tys;
774  NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
775  return true;
776  }
777 
778  if (Name == "arm.mve.vctp64" &&
779  cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
780  // A vctp64 returning a v4i1 is converted to return a v2i1. Rename the
781  // function and deal with it below in UpgradeIntrinsicCall.
782  rename(F);
783  return true;
784  }
785  // These too are changed to accept a v2i1 insteead of the old v4i1.
786  if (Name == "arm.mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
787  Name == "arm.mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
788  Name == "arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
789  Name == "arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
790  Name == "arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
791  Name == "arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
792  Name == "arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
793  Name == "arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
794  Name == "arm.cde.vcx1q.predicated.v2i64.v4i1" ||
795  Name == "arm.cde.vcx1qa.predicated.v2i64.v4i1" ||
796  Name == "arm.cde.vcx2q.predicated.v2i64.v4i1" ||
797  Name == "arm.cde.vcx2qa.predicated.v2i64.v4i1" ||
798  Name == "arm.cde.vcx3q.predicated.v2i64.v4i1" ||
799  Name == "arm.cde.vcx3qa.predicated.v2i64.v4i1")
800  return true;
801 
802  if (Name == "amdgcn.alignbit") {
803  // Target specific intrinsic became redundant
804  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::fshr,
805  {F->getReturnType()});
806  return true;
807  }
808 
809  break;
810  }
811 
812  case 'c': {
813  if (Name.startswith("ctlz.") && F->arg_size() == 1) {
814  rename(F);
815  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
816  F->arg_begin()->getType());
817  return true;
818  }
819  if (Name.startswith("cttz.") && F->arg_size() == 1) {
820  rename(F);
821  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
822  F->arg_begin()->getType());
823  return true;
824  }
825  break;
826  }
827  case 'd': {
828  if (Name == "dbg.value" && F->arg_size() == 4) {
829  rename(F);
830  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
831  return true;
832  }
833  break;
834  }
835  case 'e': {
836  if (Name.startswith("experimental.vector.extract.")) {
837  rename(F);
838  Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
839  NewFn = Intrinsic::getDeclaration(F->getParent(),
840  Intrinsic::vector_extract, Tys);
841  return true;
842  }
843 
844  if (Name.startswith("experimental.vector.insert.")) {
845  rename(F);
846  auto Args = F->getFunctionType()->params();
847  Type *Tys[] = {Args[0], Args[1]};
848  NewFn = Intrinsic::getDeclaration(F->getParent(),
849  Intrinsic::vector_insert, Tys);
850  return true;
851  }
852 
854  static const Regex R("^experimental.vector.reduce.([a-z]+)\\.[a-z][0-9]+");
855  if (R.match(Name, &Groups)) {
858  .Case("add", Intrinsic::vector_reduce_add)
859  .Case("mul", Intrinsic::vector_reduce_mul)
860  .Case("and", Intrinsic::vector_reduce_and)
861  .Case("or", Intrinsic::vector_reduce_or)
862  .Case("xor", Intrinsic::vector_reduce_xor)
863  .Case("smax", Intrinsic::vector_reduce_smax)
864  .Case("smin", Intrinsic::vector_reduce_smin)
865  .Case("umax", Intrinsic::vector_reduce_umax)
866  .Case("umin", Intrinsic::vector_reduce_umin)
867  .Case("fmax", Intrinsic::vector_reduce_fmax)
868  .Case("fmin", Intrinsic::vector_reduce_fmin)
870  if (ID != Intrinsic::not_intrinsic) {
871  rename(F);
872  auto Args = F->getFunctionType()->params();
873  NewFn = Intrinsic::getDeclaration(F->getParent(), ID, {Args[0]});
874  return true;
875  }
876  }
877  static const Regex R2(
878  "^experimental.vector.reduce.v2.([a-z]+)\\.[fi][0-9]+");
879  Groups.clear();
880  if (R2.match(Name, &Groups)) {
882  if (Groups[1] == "fadd")
883  ID = Intrinsic::vector_reduce_fadd;
884  if (Groups[1] == "fmul")
885  ID = Intrinsic::vector_reduce_fmul;
886  if (ID != Intrinsic::not_intrinsic) {
887  rename(F);
888  auto Args = F->getFunctionType()->params();
889  Type *Tys[] = {Args[1]};
890  NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
891  return true;
892  }
893  }
894  break;
895  }
896  case 'i':
897  case 'l': {
898  bool IsLifetimeStart = Name.startswith("lifetime.start");
899  if (IsLifetimeStart || Name.startswith("invariant.start")) {
900  Intrinsic::ID ID = IsLifetimeStart ?
901  Intrinsic::lifetime_start : Intrinsic::invariant_start;
902  auto Args = F->getFunctionType()->params();
903  Type* ObjectPtr[1] = {Args[1]};
904  if (F->getName() != Intrinsic::getName(ID, ObjectPtr, F->getParent())) {
905  rename(F);
906  NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
907  return true;
908  }
909  }
910 
911  bool IsLifetimeEnd = Name.startswith("lifetime.end");
912  if (IsLifetimeEnd || Name.startswith("invariant.end")) {
913  Intrinsic::ID ID = IsLifetimeEnd ?
914  Intrinsic::lifetime_end : Intrinsic::invariant_end;
915 
916  auto Args = F->getFunctionType()->params();
917  Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
918  if (F->getName() != Intrinsic::getName(ID, ObjectPtr, F->getParent())) {
919  rename(F);
920  NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
921  return true;
922  }
923  }
924  if (Name.startswith("invariant.group.barrier")) {
925  // Rename invariant.group.barrier to launder.invariant.group
926  auto Args = F->getFunctionType()->params();
927  Type* ObjectPtr[1] = {Args[0]};
928  rename(F);
929  NewFn = Intrinsic::getDeclaration(F->getParent(),
930  Intrinsic::launder_invariant_group, ObjectPtr);
931  return true;
932 
933  }
934 
935  break;
936  }
937  case 'm': {
938  if (Name.startswith("masked.load.")) {
939  Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
940  if (F->getName() !=
941  Intrinsic::getName(Intrinsic::masked_load, Tys, F->getParent())) {
942  rename(F);
943  NewFn = Intrinsic::getDeclaration(F->getParent(),
944  Intrinsic::masked_load,
945  Tys);
946  return true;
947  }
948  }
949  if (Name.startswith("masked.store.")) {
950  auto Args = F->getFunctionType()->params();
951  Type *Tys[] = { Args[0], Args[1] };
952  if (F->getName() !=
953  Intrinsic::getName(Intrinsic::masked_store, Tys, F->getParent())) {
954  rename(F);
955  NewFn = Intrinsic::getDeclaration(F->getParent(),
956  Intrinsic::masked_store,
957  Tys);
958  return true;
959  }
960  }
961  // Renaming gather/scatter intrinsics with no address space overloading
962  // to the new overload which includes an address space
963  if (Name.startswith("masked.gather.")) {
964  Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
965  if (F->getName() !=
966  Intrinsic::getName(Intrinsic::masked_gather, Tys, F->getParent())) {
967  rename(F);
968  NewFn = Intrinsic::getDeclaration(F->getParent(),
969  Intrinsic::masked_gather, Tys);
970  return true;
971  }
972  }
973  if (Name.startswith("masked.scatter.")) {
974  auto Args = F->getFunctionType()->params();
975  Type *Tys[] = {Args[0], Args[1]};
976  if (F->getName() !=
977  Intrinsic::getName(Intrinsic::masked_scatter, Tys, F->getParent())) {
978  rename(F);
979  NewFn = Intrinsic::getDeclaration(F->getParent(),
980  Intrinsic::masked_scatter, Tys);
981  return true;
982  }
983  }
984  // Updating the memory intrinsics (memcpy/memmove/memset) that have an
985  // alignment parameter to embedding the alignment as an attribute of
986  // the pointer args.
987  if (Name.startswith("memcpy.") && F->arg_size() == 5) {
988  rename(F);
989  // Get the types of dest, src, and len
990  ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
991  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy,
992  ParamTypes);
993  return true;
994  }
995  if (Name.startswith("memmove.") && F->arg_size() == 5) {
996  rename(F);
997  // Get the types of dest, src, and len
998  ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
999  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove,
1000  ParamTypes);
1001  return true;
1002  }
1003  if (Name.startswith("memset.") && F->arg_size() == 5) {
1004  rename(F);
1005  // Get the types of dest, and len
1006  const auto *FT = F->getFunctionType();
1007  Type *ParamTypes[2] = {
1008  FT->getParamType(0), // Dest
1009  FT->getParamType(2) // len
1010  };
1011  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
1012  ParamTypes);
1013  return true;
1014  }
1015  break;
1016  }
1017  case 'n': {
1018  if (Name.startswith("nvvm.")) {
1019  Name = Name.substr(5);
1020 
1021  // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
1023  .Cases("brev32", "brev64", Intrinsic::bitreverse)
1024  .Case("clz.i", Intrinsic::ctlz)
1025  .Case("popc.i", Intrinsic::ctpop)
1027  if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
1028  NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
1029  {F->getReturnType()});
1030  return true;
1031  }
1032 
1033  // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1034  // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1035  //
1036  // TODO: We could add lohi.i2d.
1037  bool Expand = StringSwitch<bool>(Name)
1038  .Cases("abs.i", "abs.ll", true)
1039  .Cases("clz.ll", "popc.ll", "h2f", true)
1040  .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
1041  .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
1042  .StartsWith("atomic.load.add.f32.p", true)
1043  .StartsWith("atomic.load.add.f64.p", true)
1044  .Default(false);
1045  if (Expand) {
1046  NewFn = nullptr;
1047  return true;
1048  }
1049  }
1050  break;
1051  }
1052  case 'o':
1053  // We only need to change the name to match the mangling including the
1054  // address space.
1055  if (Name.startswith("objectsize.")) {
1056  Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1057  if (F->arg_size() == 2 || F->arg_size() == 3 ||
1058  F->getName() !=
1059  Intrinsic::getName(Intrinsic::objectsize, Tys, F->getParent())) {
1060  rename(F);
1061  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
1062  Tys);
1063  return true;
1064  }
1065  }
1066  break;
1067 
1068  case 'p':
1069  if (Name == "prefetch") {
1070  // Handle address space overloading.
1071  Type *Tys[] = {F->arg_begin()->getType()};
1072  if (F->getName() !=
1073  Intrinsic::getName(Intrinsic::prefetch, Tys, F->getParent())) {
1074  rename(F);
1075  NewFn =
1076  Intrinsic::getDeclaration(F->getParent(), Intrinsic::prefetch, Tys);
1077  return true;
1078  }
1079  } else if (Name.startswith("ptr.annotation.") && F->arg_size() == 4) {
1080  rename(F);
1081  NewFn = Intrinsic::getDeclaration(F->getParent(),
1082  Intrinsic::ptr_annotation,
1083  F->arg_begin()->getType());
1084  return true;
1085  }
1086  break;
1087 
1088  case 's':
1089  if (Name == "stackprotectorcheck") {
1090  NewFn = nullptr;
1091  return true;
1092  }
1093  break;
1094 
1095  case 'v': {
1096  if (Name == "var.annotation" && F->arg_size() == 4) {
1097  rename(F);
1098  NewFn = Intrinsic::getDeclaration(F->getParent(),
1099  Intrinsic::var_annotation);
1100  return true;
1101  }
1102  break;
1103  }
1104 
1105  case 'x':
1106  if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
1107  return true;
1108  }
1109 
1110  auto *ST = dyn_cast<StructType>(F->getReturnType());
1111  if (ST && (!ST->isLiteral() || ST->isPacked())) {
1112  // Replace return type with literal non-packed struct. Only do this for
1113  // intrinsics declared to return a struct, not for intrinsics with
1114  // overloaded return type, in which case the exact struct type will be
1115  // mangled into the name.
1117  Intrinsic::getIntrinsicInfoTableEntries(F->getIntrinsicID(), Desc);
1118  if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1119  auto *FT = F->getFunctionType();
1120  auto *NewST = StructType::get(ST->getContext(), ST->elements());
1121  auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1122  std::string Name = F->getName().str();
1123  rename(F);
1124  NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1125  Name, F->getParent());
1126 
1127  // The new function may also need remangling.
1128  if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1129  NewFn = *Result;
1130  return true;
1131  }
1132  }
1133 
1134  // Remangle our intrinsic since we upgrade the mangling
1136  if (Result != None) {
1137  NewFn = *Result;
1138  return true;
1139  }
1140 
1141  // This may not belong here. This function is effectively being overloaded
1142  // to both detect an intrinsic which needs upgrading, and to provide the
1143  // upgraded form of the intrinsic. We should perhaps have two separate
1144  // functions for this.
1145  return false;
1146 }
1147 
1149  NewFn = nullptr;
1150  bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
1151  assert(F != NewFn && "Intrinsic function upgraded to the same function");
1152 
1153  // Upgrade intrinsic attributes. This does not change the function.
1154  if (NewFn)
1155  F = NewFn;
1156  if (Intrinsic::ID id = F->getIntrinsicID())
1157  F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
1158  return Upgraded;
1159 }
1160 
1162  if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1163  GV->getName() == "llvm.global_dtors")) ||
1164  !GV->hasInitializer())
1165  return nullptr;
1166  ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
1167  if (!ATy)
1168  return nullptr;
1169  StructType *STy = dyn_cast<StructType>(ATy->getElementType());
1170  if (!STy || STy->getNumElements() != 2)
1171  return nullptr;
1172 
1173  LLVMContext &C = GV->getContext();
1174  IRBuilder<> IRB(C);
1175  auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1176  IRB.getInt8PtrTy());
1177  Constant *Init = GV->getInitializer();
1178  unsigned N = Init->getNumOperands();
1179  std::vector<Constant *> NewCtors(N);
1180  for (unsigned i = 0; i != N; ++i) {
1181  auto Ctor = cast<Constant>(Init->getOperand(i));
1182  NewCtors[i] = ConstantStruct::get(
1183  EltTy, Ctor->getAggregateElement(0u), Ctor->getAggregateElement(1),
1185  }
1186  Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1187 
1188  return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1189  NewInit, GV->getName());
1190 }
1191 
1192 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1193 // to byte shuffles.
1195  Value *Op, unsigned Shift) {
1196  auto *ResultTy = cast<FixedVectorType>(Op->getType());
1197  unsigned NumElts = ResultTy->getNumElements() * 8;
1198 
1199  // Bitcast from a 64-bit element type to a byte element type.
1200  Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1201  Op = Builder.CreateBitCast(Op, VecTy, "cast");
1202 
1203  // We'll be shuffling in zeroes.
1204  Value *Res = Constant::getNullValue(VecTy);
1205 
1206  // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1207  // we'll just return the zero vector.
1208  if (Shift < 16) {
1209  int Idxs[64];
1210  // 256/512-bit version is split into 2/4 16-byte lanes.
1211  for (unsigned l = 0; l != NumElts; l += 16)
1212  for (unsigned i = 0; i != 16; ++i) {
1213  unsigned Idx = NumElts + i - Shift;
1214  if (Idx < NumElts)
1215  Idx -= NumElts - 16; // end of lane, switch operand.
1216  Idxs[l + i] = Idx + l;
1217  }
1218 
1219  Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
1220  }
1221 
1222  // Bitcast back to a 64-bit element type.
1223  return Builder.CreateBitCast(Res, ResultTy, "cast");
1224 }
1225 
1226 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1227 // to byte shuffles.
1229  unsigned Shift) {
1230  auto *ResultTy = cast<FixedVectorType>(Op->getType());
1231  unsigned NumElts = ResultTy->getNumElements() * 8;
1232 
1233  // Bitcast from a 64-bit element type to a byte element type.
1234  Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1235  Op = Builder.CreateBitCast(Op, VecTy, "cast");
1236 
1237  // We'll be shuffling in zeroes.
1238  Value *Res = Constant::getNullValue(VecTy);
1239 
1240  // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1241  // we'll just return the zero vector.
1242  if (Shift < 16) {
1243  int Idxs[64];
1244  // 256/512-bit version is split into 2/4 16-byte lanes.
1245  for (unsigned l = 0; l != NumElts; l += 16)
1246  for (unsigned i = 0; i != 16; ++i) {
1247  unsigned Idx = i + Shift;
1248  if (Idx >= 16)
1249  Idx += NumElts - 16; // end of lane, switch operand.
1250  Idxs[l + i] = Idx + l;
1251  }
1252 
1253  Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
1254  }
1255 
1256  // Bitcast back to a 64-bit element type.
1257  return Builder.CreateBitCast(Res, ResultTy, "cast");
1258 }
1259 
1261  unsigned NumElts) {
1262  assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1264  Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1265  Mask = Builder.CreateBitCast(Mask, MaskTy);
1266 
1267  // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1268  // i8 and we need to extract down to the right number of elements.
1269  if (NumElts <= 4) {
1270  int Indices[4];
1271  for (unsigned i = 0; i != NumElts; ++i)
1272  Indices[i] = i;
1273  Mask = Builder.CreateShuffleVector(
1274  Mask, Mask, makeArrayRef(Indices, NumElts), "extract");
1275  }
1276 
1277  return Mask;
1278 }
1279 
1281  Value *Op0, Value *Op1) {
1282  // If the mask is all ones just emit the first operation.
1283  if (const auto *C = dyn_cast<Constant>(Mask))
1284  if (C->isAllOnesValue())
1285  return Op0;
1286 
1288  cast<FixedVectorType>(Op0->getType())->getNumElements());
1289  return Builder.CreateSelect(Mask, Op0, Op1);
1290 }
1291 
1293  Value *Op0, Value *Op1) {
1294  // If the mask is all ones just emit the first operation.
1295  if (const auto *C = dyn_cast<Constant>(Mask))
1296  if (C->isAllOnesValue())
1297  return Op0;
1298 
1299  auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1300  Mask->getType()->getIntegerBitWidth());
1301  Mask = Builder.CreateBitCast(Mask, MaskTy);
1302  Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1303  return Builder.CreateSelect(Mask, Op0, Op1);
1304 }
1305 
1306 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1307 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1308 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1310  Value *Op1, Value *Shift,
1311  Value *Passthru, Value *Mask,
1312  bool IsVALIGN) {
1313  unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1314 
1315  unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1316  assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1317  assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1318  assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1319 
1320  // Mask the immediate for VALIGN.
1321  if (IsVALIGN)
1322  ShiftVal &= (NumElts - 1);
1323 
1324  // If palignr is shifting the pair of vectors more than the size of two
1325  // lanes, emit zero.
1326  if (ShiftVal >= 32)
1327  return llvm::Constant::getNullValue(Op0->getType());
1328 
1329  // If palignr is shifting the pair of input vectors more than one lane,
1330  // but less than two lanes, convert to shifting in zeroes.
1331  if (ShiftVal > 16) {
1332  ShiftVal -= 16;
1333  Op1 = Op0;
1334  Op0 = llvm::Constant::getNullValue(Op0->getType());
1335  }
1336 
1337  int Indices[64];
1338  // 256-bit palignr operates on 128-bit lanes so we need to handle that
1339  for (unsigned l = 0; l < NumElts; l += 16) {
1340  for (unsigned i = 0; i != 16; ++i) {
1341  unsigned Idx = ShiftVal + i;
1342  if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1343  Idx += NumElts - 16; // End of lane, switch operand.
1344  Indices[l + i] = Idx + l;
1345  }
1346  }
1347 
1348  Value *Align = Builder.CreateShuffleVector(Op1, Op0,
1349  makeArrayRef(Indices, NumElts),
1350  "palignr");
1351 
1352  return EmitX86Select(Builder, Mask, Align, Passthru);
1353 }
1354 
1356  bool ZeroMask, bool IndexForm) {
1357  Type *Ty = CI.getType();
1358  unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1359  unsigned EltWidth = Ty->getScalarSizeInBits();
1360  bool IsFloat = Ty->isFPOrFPVectorTy();
1361  Intrinsic::ID IID;
1362  if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1363  IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1364  else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1365  IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1366  else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1367  IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1368  else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1369  IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1370  else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1371  IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1372  else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1373  IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1374  else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1375  IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1376  else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1377  IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1378  else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1379  IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1380  else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1381  IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1382  else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1383  IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1384  else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1385  IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1386  else if (VecWidth == 128 && EltWidth == 16)
1387  IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1388  else if (VecWidth == 256 && EltWidth == 16)
1389  IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1390  else if (VecWidth == 512 && EltWidth == 16)
1391  IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1392  else if (VecWidth == 128 && EltWidth == 8)
1393  IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1394  else if (VecWidth == 256 && EltWidth == 8)
1395  IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1396  else if (VecWidth == 512 && EltWidth == 8)
1397  IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1398  else
1399  llvm_unreachable("Unexpected intrinsic");
1400 
1401  Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1402  CI.getArgOperand(2) };
1403 
1404  // If this isn't index form we need to swap operand 0 and 1.
1405  if (!IndexForm)
1406  std::swap(Args[0], Args[1]);
1407 
1408  Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1409  Args);
1410  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1411  : Builder.CreateBitCast(CI.getArgOperand(1),
1412  Ty);
1413  return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1414 }
1415 
1417  Intrinsic::ID IID) {
1418  Type *Ty = CI.getType();
1419  Value *Op0 = CI.getOperand(0);
1420  Value *Op1 = CI.getOperand(1);
1421  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1422  Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
1423 
1424  if (CI.arg_size() == 4) { // For masked intrinsics.
1425  Value *VecSrc = CI.getOperand(2);
1426  Value *Mask = CI.getOperand(3);
1427  Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1428  }
1429  return Res;
1430 }
1431 
1433  bool IsRotateRight) {
1434  Type *Ty = CI.getType();
1435  Value *Src = CI.getArgOperand(0);
1436  Value *Amt = CI.getArgOperand(1);
1437 
1438  // Amount may be scalar immediate, in which case create a splat vector.
1439  // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1440  // we only care about the lowest log2 bits anyway.
1441  if (Amt->getType() != Ty) {
1442  unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1443  Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1444  Amt = Builder.CreateVectorSplat(NumElts, Amt);
1445  }
1446 
1447  Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1448  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1449  Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
1450 
1451  if (CI.arg_size() == 4) { // For masked intrinsics.
1452  Value *VecSrc = CI.getOperand(2);
1453  Value *Mask = CI.getOperand(3);
1454  Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1455  }
1456  return Res;
1457 }
1458 
1460  bool IsSigned) {
1461  Type *Ty = CI.getType();
1462  Value *LHS = CI.getArgOperand(0);
1463  Value *RHS = CI.getArgOperand(1);
1464 
1465  CmpInst::Predicate Pred;
1466  switch (Imm) {
1467  case 0x0:
1468  Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1469  break;
1470  case 0x1:
1471  Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1472  break;
1473  case 0x2:
1474  Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1475  break;
1476  case 0x3:
1477  Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1478  break;
1479  case 0x4:
1480  Pred = ICmpInst::ICMP_EQ;
1481  break;
1482  case 0x5:
1483  Pred = ICmpInst::ICMP_NE;
1484  break;
1485  case 0x6:
1486  return Constant::getNullValue(Ty); // FALSE
1487  case 0x7:
1488  return Constant::getAllOnesValue(Ty); // TRUE
1489  default:
1490  llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1491  }
1492 
1493  Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1494  Value *Ext = Builder.CreateSExt(Cmp, Ty);
1495  return Ext;
1496 }
1497 
1499  bool IsShiftRight, bool ZeroMask) {
1500  Type *Ty = CI.getType();
1501  Value *Op0 = CI.getArgOperand(0);
1502  Value *Op1 = CI.getArgOperand(1);
1503  Value *Amt = CI.getArgOperand(2);
1504 
1505  if (IsShiftRight)
1506  std::swap(Op0, Op1);
1507 
1508  // Amount may be scalar immediate, in which case create a splat vector.
1509  // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1510  // we only care about the lowest log2 bits anyway.
1511  if (Amt->getType() != Ty) {
1512  unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1513  Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1514  Amt = Builder.CreateVectorSplat(NumElts, Amt);
1515  }
1516 
1517  Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1518  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1519  Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
1520 
1521  unsigned NumArgs = CI.arg_size();
1522  if (NumArgs >= 4) { // For masked intrinsics.
1523  Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1524  ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
1525  CI.getArgOperand(0);
1526  Value *Mask = CI.getOperand(NumArgs - 1);
1527  Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1528  }
1529  return Res;
1530 }
1531 
1533  Value *Ptr, Value *Data, Value *Mask,
1534  bool Aligned) {
1535  // Cast the pointer to the right type.
1536  Ptr = Builder.CreateBitCast(Ptr,
1537  llvm::PointerType::getUnqual(Data->getType()));
1538  const Align Alignment =
1539  Aligned
1540  ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedSize() / 8)
1541  : Align(1);
1542 
1543  // If the mask is all ones just emit a regular store.
1544  if (const auto *C = dyn_cast<Constant>(Mask))
1545  if (C->isAllOnesValue())
1546  return Builder.CreateAlignedStore(Data, Ptr, Alignment);
1547 
1548  // Convert the mask from an integer type to a vector of i1.
1549  unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
1550  Mask = getX86MaskVec(Builder, Mask, NumElts);
1551  return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
1552 }
1553 
1555  Value *Ptr, Value *Passthru, Value *Mask,
1556  bool Aligned) {
1557  Type *ValTy = Passthru->getType();
1558  // Cast the pointer to the right type.
1559  Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy));
1560  const Align Alignment =
1561  Aligned
1562  ? Align(Passthru->getType()->getPrimitiveSizeInBits().getFixedSize() /
1563  8)
1564  : Align(1);
1565 
1566  // If the mask is all ones just emit a regular store.
1567  if (const auto *C = dyn_cast<Constant>(Mask))
1568  if (C->isAllOnesValue())
1569  return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
1570 
1571  // Convert the mask from an integer type to a vector of i1.
1572  unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
1573  Mask = getX86MaskVec(Builder, Mask, NumElts);
1574  return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
1575 }
1576 
1578  Type *Ty = CI.getType();
1579  Value *Op0 = CI.getArgOperand(0);
1581  Value *Res = Builder.CreateCall(F, {Op0, Builder.getInt1(false)});
1582  if (CI.arg_size() == 3)
1583  Res = EmitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
1584  return Res;
1585 }
1586 
1587 static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
1588  Type *Ty = CI.getType();
1589 
1590  // Arguments have a vXi32 type so cast to vXi64.
1591  Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1592  Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1593 
1594  if (IsSigned) {
1595  // Shift left then arithmetic shift right.
1596  Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1597  LHS = Builder.CreateShl(LHS, ShiftAmt);
1598  LHS = Builder.CreateAShr(LHS, ShiftAmt);
1599  RHS = Builder.CreateShl(RHS, ShiftAmt);
1600  RHS = Builder.CreateAShr(RHS, ShiftAmt);
1601  } else {
1602  // Clear the upper bits.
1603  Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1604  LHS = Builder.CreateAnd(LHS, Mask);
1605  RHS = Builder.CreateAnd(RHS, Mask);
1606  }
1607 
1608  Value *Res = Builder.CreateMul(LHS, RHS);
1609 
1610  if (CI.arg_size() == 4)
1611  Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1612 
1613  return Res;
1614 }
1615 
1616 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1618  Value *Mask) {
1619  unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
1620  if (Mask) {
1621  const auto *C = dyn_cast<Constant>(Mask);
1622  if (!C || !C->isAllOnesValue())
1623  Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1624  }
1625 
1626  if (NumElts < 8) {
1627  int Indices[8];
1628  for (unsigned i = 0; i != NumElts; ++i)
1629  Indices[i] = i;
1630  for (unsigned i = NumElts; i != 8; ++i)
1631  Indices[i] = NumElts + i % NumElts;
1632  Vec = Builder.CreateShuffleVector(Vec,
1634  Indices);
1635  }
1636  return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1637 }
1638 
1640  unsigned CC, bool Signed) {
1641  Value *Op0 = CI.getArgOperand(0);
1642  unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1643 
1644  Value *Cmp;
1645  if (CC == 3) {
1646  Cmp = Constant::getNullValue(
1647  FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1648  } else if (CC == 7) {
1650  FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1651  } else {
1652  ICmpInst::Predicate Pred;
1653  switch (CC) {
1654  default: llvm_unreachable("Unknown condition code");
1655  case 0: Pred = ICmpInst::ICMP_EQ; break;
1656  case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1657  case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1658  case 4: Pred = ICmpInst::ICMP_NE; break;
1659  case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1660  case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1661  }
1662  Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
1663  }
1664 
1665  Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
1666 
1667  return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
1668 }
1669 
1670 // Replace a masked intrinsic with an older unmasked intrinsic.
1672  Intrinsic::ID IID) {
1673  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
1674  Value *Rep = Builder.CreateCall(Intrin,
1675  { CI.getArgOperand(0), CI.getArgOperand(1) });
1676  return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
1677 }
1678 
1680  Value* A = CI.getArgOperand(0);
1681  Value* B = CI.getArgOperand(1);
1682  Value* Src = CI.getArgOperand(2);
1683  Value* Mask = CI.getArgOperand(3);
1684 
1685  Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
1686  Value* Cmp = Builder.CreateIsNotNull(AndNode);
1687  Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
1688  Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
1689  Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
1690  return Builder.CreateInsertElement(A, Select, (uint64_t)0);
1691 }
1692 
1693 
1695  Value* Op = CI.getArgOperand(0);
1696  Type* ReturnOp = CI.getType();
1697  unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
1698  Value *Mask = getX86MaskVec(Builder, Op, NumElts);
1699  return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1700 }
1701 
1702 // Replace intrinsic with unmasked version and a select.
1704  CallBase &CI, Value *&Rep) {
1705  Name = Name.substr(12); // Remove avx512.mask.
1706 
1707  unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
1708  unsigned EltWidth = CI.getType()->getScalarSizeInBits();
1709  Intrinsic::ID IID;
1710  if (Name.startswith("max.p")) {
1711  if (VecWidth == 128 && EltWidth == 32)
1712  IID = Intrinsic::x86_sse_max_ps;
1713  else if (VecWidth == 128 && EltWidth == 64)
1714  IID = Intrinsic::x86_sse2_max_pd;
1715  else if (VecWidth == 256 && EltWidth == 32)
1716  IID = Intrinsic::x86_avx_max_ps_256;
1717  else if (VecWidth == 256 && EltWidth == 64)
1718  IID = Intrinsic::x86_avx_max_pd_256;
1719  else
1720  llvm_unreachable("Unexpected intrinsic");
1721  } else if (Name.startswith("min.p")) {
1722  if (VecWidth == 128 && EltWidth == 32)
1723  IID = Intrinsic::x86_sse_min_ps;
1724  else if (VecWidth == 128 && EltWidth == 64)
1725  IID = Intrinsic::x86_sse2_min_pd;
1726  else if (VecWidth == 256 && EltWidth == 32)
1727  IID = Intrinsic::x86_avx_min_ps_256;
1728  else if (VecWidth == 256 && EltWidth == 64)
1729  IID = Intrinsic::x86_avx_min_pd_256;
1730  else
1731  llvm_unreachable("Unexpected intrinsic");
1732  } else if (Name.startswith("pshuf.b.")) {
1733  if (VecWidth == 128)
1734  IID = Intrinsic::x86_ssse3_pshuf_b_128;
1735  else if (VecWidth == 256)
1736  IID = Intrinsic::x86_avx2_pshuf_b;
1737  else if (VecWidth == 512)
1738  IID = Intrinsic::x86_avx512_pshuf_b_512;
1739  else
1740  llvm_unreachable("Unexpected intrinsic");
1741  } else if (Name.startswith("pmul.hr.sw.")) {
1742  if (VecWidth == 128)
1743  IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
1744  else if (VecWidth == 256)
1745  IID = Intrinsic::x86_avx2_pmul_hr_sw;
1746  else if (VecWidth == 512)
1747  IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
1748  else
1749  llvm_unreachable("Unexpected intrinsic");
1750  } else if (Name.startswith("pmulh.w.")) {
1751  if (VecWidth == 128)
1752  IID = Intrinsic::x86_sse2_pmulh_w;
1753  else if (VecWidth == 256)
1754  IID = Intrinsic::x86_avx2_pmulh_w;
1755  else if (VecWidth == 512)
1756  IID = Intrinsic::x86_avx512_pmulh_w_512;
1757  else
1758  llvm_unreachable("Unexpected intrinsic");
1759  } else if (Name.startswith("pmulhu.w.")) {
1760  if (VecWidth == 128)
1761  IID = Intrinsic::x86_sse2_pmulhu_w;
1762  else if (VecWidth == 256)
1763  IID = Intrinsic::x86_avx2_pmulhu_w;
1764  else if (VecWidth == 512)
1765  IID = Intrinsic::x86_avx512_pmulhu_w_512;
1766  else
1767  llvm_unreachable("Unexpected intrinsic");
1768  } else if (Name.startswith("pmaddw.d.")) {
1769  if (VecWidth == 128)
1770  IID = Intrinsic::x86_sse2_pmadd_wd;
1771  else if (VecWidth == 256)
1772  IID = Intrinsic::x86_avx2_pmadd_wd;
1773  else if (VecWidth == 512)
1774  IID = Intrinsic::x86_avx512_pmaddw_d_512;
1775  else
1776  llvm_unreachable("Unexpected intrinsic");
1777  } else if (Name.startswith("pmaddubs.w.")) {
1778  if (VecWidth == 128)
1779  IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
1780  else if (VecWidth == 256)
1781  IID = Intrinsic::x86_avx2_pmadd_ub_sw;
1782  else if (VecWidth == 512)
1783  IID = Intrinsic::x86_avx512_pmaddubs_w_512;
1784  else
1785  llvm_unreachable("Unexpected intrinsic");
1786  } else if (Name.startswith("packsswb.")) {
1787  if (VecWidth == 128)
1788  IID = Intrinsic::x86_sse2_packsswb_128;
1789  else if (VecWidth == 256)
1790  IID = Intrinsic::x86_avx2_packsswb;
1791  else if (VecWidth == 512)
1792  IID = Intrinsic::x86_avx512_packsswb_512;
1793  else
1794  llvm_unreachable("Unexpected intrinsic");
1795  } else if (Name.startswith("packssdw.")) {
1796  if (VecWidth == 128)
1797  IID = Intrinsic::x86_sse2_packssdw_128;
1798  else if (VecWidth == 256)
1799  IID = Intrinsic::x86_avx2_packssdw;
1800  else if (VecWidth == 512)
1801  IID = Intrinsic::x86_avx512_packssdw_512;
1802  else
1803  llvm_unreachable("Unexpected intrinsic");
1804  } else if (Name.startswith("packuswb.")) {
1805  if (VecWidth == 128)
1806  IID = Intrinsic::x86_sse2_packuswb_128;
1807  else if (VecWidth == 256)
1808  IID = Intrinsic::x86_avx2_packuswb;
1809  else if (VecWidth == 512)
1810  IID = Intrinsic::x86_avx512_packuswb_512;
1811  else
1812  llvm_unreachable("Unexpected intrinsic");
1813  } else if (Name.startswith("packusdw.")) {
1814  if (VecWidth == 128)
1815  IID = Intrinsic::x86_sse41_packusdw;
1816  else if (VecWidth == 256)
1817  IID = Intrinsic::x86_avx2_packusdw;
1818  else if (VecWidth == 512)
1819  IID = Intrinsic::x86_avx512_packusdw_512;
1820  else
1821  llvm_unreachable("Unexpected intrinsic");
1822  } else if (Name.startswith("vpermilvar.")) {
1823  if (VecWidth == 128 && EltWidth == 32)
1824  IID = Intrinsic::x86_avx_vpermilvar_ps;
1825  else if (VecWidth == 128 && EltWidth == 64)
1826  IID = Intrinsic::x86_avx_vpermilvar_pd;
1827  else if (VecWidth == 256 && EltWidth == 32)
1828  IID = Intrinsic::x86_avx_vpermilvar_ps_256;
1829  else if (VecWidth == 256 && EltWidth == 64)
1830  IID = Intrinsic::x86_avx_vpermilvar_pd_256;
1831  else if (VecWidth == 512 && EltWidth == 32)
1832  IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
1833  else if (VecWidth == 512 && EltWidth == 64)
1834  IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
1835  else
1836  llvm_unreachable("Unexpected intrinsic");
1837  } else if (Name == "cvtpd2dq.256") {
1838  IID = Intrinsic::x86_avx_cvt_pd2dq_256;
1839  } else if (Name == "cvtpd2ps.256") {
1840  IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
1841  } else if (Name == "cvttpd2dq.256") {
1842  IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
1843  } else if (Name == "cvttps2dq.128") {
1844  IID = Intrinsic::x86_sse2_cvttps2dq;
1845  } else if (Name == "cvttps2dq.256") {
1846  IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
1847  } else if (Name.startswith("permvar.")) {
1848  bool IsFloat = CI.getType()->isFPOrFPVectorTy();
1849  if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1850  IID = Intrinsic::x86_avx2_permps;
1851  else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1852  IID = Intrinsic::x86_avx2_permd;
1853  else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1854  IID = Intrinsic::x86_avx512_permvar_df_256;
1855  else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1856  IID = Intrinsic::x86_avx512_permvar_di_256;
1857  else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1858  IID = Intrinsic::x86_avx512_permvar_sf_512;
1859  else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1860  IID = Intrinsic::x86_avx512_permvar_si_512;
1861  else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1862  IID = Intrinsic::x86_avx512_permvar_df_512;
1863  else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1864  IID = Intrinsic::x86_avx512_permvar_di_512;
1865  else if (VecWidth == 128 && EltWidth == 16)
1866  IID = Intrinsic::x86_avx512_permvar_hi_128;
1867  else if (VecWidth == 256 && EltWidth == 16)
1868  IID = Intrinsic::x86_avx512_permvar_hi_256;
1869  else if (VecWidth == 512 && EltWidth == 16)
1870  IID = Intrinsic::x86_avx512_permvar_hi_512;
1871  else if (VecWidth == 128 && EltWidth == 8)
1872  IID = Intrinsic::x86_avx512_permvar_qi_128;
1873  else if (VecWidth == 256 && EltWidth == 8)
1874  IID = Intrinsic::x86_avx512_permvar_qi_256;
1875  else if (VecWidth == 512 && EltWidth == 8)
1876  IID = Intrinsic::x86_avx512_permvar_qi_512;
1877  else
1878  llvm_unreachable("Unexpected intrinsic");
1879  } else if (Name.startswith("dbpsadbw.")) {
1880  if (VecWidth == 128)
1881  IID = Intrinsic::x86_avx512_dbpsadbw_128;
1882  else if (VecWidth == 256)
1883  IID = Intrinsic::x86_avx512_dbpsadbw_256;
1884  else if (VecWidth == 512)
1885  IID = Intrinsic::x86_avx512_dbpsadbw_512;
1886  else
1887  llvm_unreachable("Unexpected intrinsic");
1888  } else if (Name.startswith("pmultishift.qb.")) {
1889  if (VecWidth == 128)
1890  IID = Intrinsic::x86_avx512_pmultishift_qb_128;
1891  else if (VecWidth == 256)
1892  IID = Intrinsic::x86_avx512_pmultishift_qb_256;
1893  else if (VecWidth == 512)
1894  IID = Intrinsic::x86_avx512_pmultishift_qb_512;
1895  else
1896  llvm_unreachable("Unexpected intrinsic");
1897  } else if (Name.startswith("conflict.")) {
1898  if (Name[9] == 'd' && VecWidth == 128)
1899  IID = Intrinsic::x86_avx512_conflict_d_128;
1900  else if (Name[9] == 'd' && VecWidth == 256)
1901  IID = Intrinsic::x86_avx512_conflict_d_256;
1902  else if (Name[9] == 'd' && VecWidth == 512)
1903  IID = Intrinsic::x86_avx512_conflict_d_512;
1904  else if (Name[9] == 'q' && VecWidth == 128)
1905  IID = Intrinsic::x86_avx512_conflict_q_128;
1906  else if (Name[9] == 'q' && VecWidth == 256)
1907  IID = Intrinsic::x86_avx512_conflict_q_256;
1908  else if (Name[9] == 'q' && VecWidth == 512)
1909  IID = Intrinsic::x86_avx512_conflict_q_512;
1910  else
1911  llvm_unreachable("Unexpected intrinsic");
1912  } else if (Name.startswith("pavg.")) {
1913  if (Name[5] == 'b' && VecWidth == 128)
1914  IID = Intrinsic::x86_sse2_pavg_b;
1915  else if (Name[5] == 'b' && VecWidth == 256)
1916  IID = Intrinsic::x86_avx2_pavg_b;
1917  else if (Name[5] == 'b' && VecWidth == 512)
1918  IID = Intrinsic::x86_avx512_pavg_b_512;
1919  else if (Name[5] == 'w' && VecWidth == 128)
1920  IID = Intrinsic::x86_sse2_pavg_w;
1921  else if (Name[5] == 'w' && VecWidth == 256)
1922  IID = Intrinsic::x86_avx2_pavg_w;
1923  else if (Name[5] == 'w' && VecWidth == 512)
1924  IID = Intrinsic::x86_avx512_pavg_w_512;
1925  else
1926  llvm_unreachable("Unexpected intrinsic");
1927  } else
1928  return false;
1929 
1931  Args.pop_back();
1932  Args.pop_back();
1933  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1934  Args);
1935  unsigned NumArgs = CI.arg_size();
1936  Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
1937  CI.getArgOperand(NumArgs - 2));
1938  return true;
1939 }
1940 
1941 /// Upgrade comment in call to inline asm that represents an objc retain release
1942 /// marker.
1943 void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
1944  size_t Pos;
1945  if (AsmStr->find("mov\tfp") == 0 &&
1946  AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
1947  (Pos = AsmStr->find("# marker")) != std::string::npos) {
1948  AsmStr->replace(Pos, 1, ";");
1949  }
1950 }
1951 
1953  IRBuilder<> &Builder) {
1954  if (Name == "mve.vctp64.old") {
1955  // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
1956  // correct type.
1957  Value *VCTP = Builder.CreateCall(
1958  Intrinsic::getDeclaration(F->getParent(), Intrinsic::arm_mve_vctp64),
1959  CI->getArgOperand(0), CI->getName());
1960  Value *C1 = Builder.CreateCall(
1962  F->getParent(), Intrinsic::arm_mve_pred_v2i,
1963  {VectorType::get(Builder.getInt1Ty(), 2, false)}),
1964  VCTP);
1965  return Builder.CreateCall(
1967  F->getParent(), Intrinsic::arm_mve_pred_i2v,
1968  {VectorType::get(Builder.getInt1Ty(), 4, false)}),
1969  C1);
1970  } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
1971  Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
1972  Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
1973  Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
1974  Name == "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
1975  Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
1976  Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
1977  Name == "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
1978  Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
1979  Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
1980  Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
1981  Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
1982  Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
1983  Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
1984  std::vector<Type *> Tys;
1985  unsigned ID = CI->getIntrinsicID();
1986  Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
1987  switch (ID) {
1988  case Intrinsic::arm_mve_mull_int_predicated:
1989  case Intrinsic::arm_mve_vqdmull_predicated:
1990  case Intrinsic::arm_mve_vldr_gather_base_predicated:
1991  Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
1992  break;
1993  case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
1994  case Intrinsic::arm_mve_vstr_scatter_base_predicated:
1995  case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
1996  Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
1997  V2I1Ty};
1998  break;
1999  case Intrinsic::arm_mve_vldr_gather_offset_predicated:
2000  Tys = {CI->getType(), CI->getOperand(0)->getType(),
2001  CI->getOperand(1)->getType(), V2I1Ty};
2002  break;
2003  case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
2004  Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
2005  CI->getOperand(2)->getType(), V2I1Ty};
2006  break;
2007  case Intrinsic::arm_cde_vcx1q_predicated:
2008  case Intrinsic::arm_cde_vcx1qa_predicated:
2009  case Intrinsic::arm_cde_vcx2q_predicated:
2010  case Intrinsic::arm_cde_vcx2qa_predicated:
2011  case Intrinsic::arm_cde_vcx3q_predicated:
2012  case Intrinsic::arm_cde_vcx3qa_predicated:
2013  Tys = {CI->getOperand(1)->getType(), V2I1Ty};
2014  break;
2015  default:
2016  llvm_unreachable("Unhandled Intrinsic!");
2017  }
2018 
2019  std::vector<Value *> Ops;
2020  for (Value *Op : CI->args()) {
2021  Type *Ty = Op->getType();
2022  if (Ty->getScalarSizeInBits() == 1) {
2023  Value *C1 = Builder.CreateCall(
2025  F->getParent(), Intrinsic::arm_mve_pred_v2i,
2026  {VectorType::get(Builder.getInt1Ty(), 4, false)}),
2027  Op);
2028  Op = Builder.CreateCall(
2029  Intrinsic::getDeclaration(F->getParent(),
2030  Intrinsic::arm_mve_pred_i2v, {V2I1Ty}),
2031  C1);
2032  }
2033  Ops.push_back(Op);
2034  }
2035 
2036  Function *Fn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
2037  return Builder.CreateCall(Fn, Ops, CI->getName());
2038  }
2039  llvm_unreachable("Unknown function for ARM CallBase upgrade.");
2040 }
2041 
2042 /// Upgrade a call to an old intrinsic. All argument and return casting must be
2043 /// provided to seamlessly integrate with existing context.
2045  // Note dyn_cast to Function is not quite the same as getCalledFunction, which
2046  // checks the callee's function type matches. It's likely we need to handle
2047  // type changes here.
2048  Function *F = dyn_cast<Function>(CI->getCalledOperand());
2049  if (!F)
2050  return;
2051 
2052  LLVMContext &C = CI->getContext();
2054  Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
2055 
2056  if (!NewFn) {
2057  // Get the Function's name.
2058  StringRef Name = F->getName();
2059 
2060  assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
2061  Name = Name.substr(5);
2062 
2063  bool IsX86 = Name.startswith("x86.");
2064  if (IsX86)
2065  Name = Name.substr(4);
2066  bool IsNVVM = Name.startswith("nvvm.");
2067  if (IsNVVM)
2068  Name = Name.substr(5);
2069  bool IsARM = Name.startswith("arm.");
2070  if (IsARM)
2071  Name = Name.substr(4);
2072 
2073  if (IsX86 && Name.startswith("sse4a.movnt.")) {
2074  Module *M = F->getParent();
2076  Elts.push_back(
2078  MDNode *Node = MDNode::get(C, Elts);
2079 
2080  Value *Arg0 = CI->getArgOperand(0);
2081  Value *Arg1 = CI->getArgOperand(1);
2082 
2083  // Nontemporal (unaligned) store of the 0'th element of the float/double
2084  // vector.
2085  Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
2086  PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
2087  Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
2088  Value *Extract =
2089  Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2090 
2091  StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, Align(1));
2092  SI->setMetadata(M->getMDKindID("nontemporal"), Node);
2093 
2094  // Remove intrinsic.
2095  CI->eraseFromParent();
2096  return;
2097  }
2098 
2099  if (IsX86 && (Name.startswith("avx.movnt.") ||
2100  Name.startswith("avx512.storent."))) {
2101  Module *M = F->getParent();
2103  Elts.push_back(
2105  MDNode *Node = MDNode::get(C, Elts);
2106 
2107  Value *Arg0 = CI->getArgOperand(0);
2108  Value *Arg1 = CI->getArgOperand(1);
2109 
2110  // Convert the type of the pointer to a pointer to the stored type.
2111  Value *BC = Builder.CreateBitCast(Arg0,
2113  "cast");
2114  StoreInst *SI = Builder.CreateAlignedStore(
2115  Arg1, BC,
2116  Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedSize() / 8));
2117  SI->setMetadata(M->getMDKindID("nontemporal"), Node);
2118 
2119  // Remove intrinsic.
2120  CI->eraseFromParent();
2121  return;
2122  }
2123 
2124  if (IsX86 && Name == "sse2.storel.dq") {
2125  Value *Arg0 = CI->getArgOperand(0);
2126  Value *Arg1 = CI->getArgOperand(1);
2127 
2128  auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2129  Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2130  Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2131  Value *BC = Builder.CreateBitCast(Arg0,
2133  "cast");
2134  Builder.CreateAlignedStore(Elt, BC, Align(1));
2135 
2136  // Remove intrinsic.
2137  CI->eraseFromParent();
2138  return;
2139  }
2140 
2141  if (IsX86 && (Name.startswith("sse.storeu.") ||
2142  Name.startswith("sse2.storeu.") ||
2143  Name.startswith("avx.storeu."))) {
2144  Value *Arg0 = CI->getArgOperand(0);
2145  Value *Arg1 = CI->getArgOperand(1);
2146 
2147  Arg0 = Builder.CreateBitCast(Arg0,
2149  "cast");
2150  Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2151 
2152  // Remove intrinsic.
2153  CI->eraseFromParent();
2154  return;
2155  }
2156 
2157  if (IsX86 && Name == "avx512.mask.store.ss") {
2158  Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2160  Mask, false);
2161 
2162  // Remove intrinsic.
2163  CI->eraseFromParent();
2164  return;
2165  }
2166 
2167  if (IsX86 && (Name.startswith("avx512.mask.store"))) {
2168  // "avx512.mask.storeu." or "avx512.mask.store."
2169  bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2171  CI->getArgOperand(2), Aligned);
2172 
2173  // Remove intrinsic.
2174  CI->eraseFromParent();
2175  return;
2176  }
2177 
2178  Value *Rep;
2179  // Upgrade packed integer vector compare intrinsics to compare instructions.
2180  if (IsX86 && (Name.startswith("sse2.pcmp") ||
2181  Name.startswith("avx2.pcmp"))) {
2182  // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2183  bool CmpEq = Name[9] == 'e';
2184  Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2185  CI->getArgOperand(0), CI->getArgOperand(1));
2186  Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2187  } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {
2188  Type *ExtTy = Type::getInt32Ty(C);
2189  if (CI->getOperand(0)->getType()->isIntegerTy(8))
2190  ExtTy = Type::getInt64Ty(C);
2191  unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2192  ExtTy->getPrimitiveSizeInBits();
2193  Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2194  Rep = Builder.CreateVectorSplat(NumElts, Rep);
2195  } else if (IsX86 && (Name == "sse.sqrt.ss" ||
2196  Name == "sse2.sqrt.sd")) {
2197  Value *Vec = CI->getArgOperand(0);
2198  Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2199  Function *Intr = Intrinsic::getDeclaration(F->getParent(),
2200  Intrinsic::sqrt, Elt0->getType());
2201  Elt0 = Builder.CreateCall(Intr, Elt0);
2202  Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2203  } else if (IsX86 && (Name.startswith("avx.sqrt.p") ||
2204  Name.startswith("sse2.sqrt.p") ||
2205  Name.startswith("sse.sqrt.p"))) {
2206  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2207  Intrinsic::sqrt,
2208  CI->getType()),
2209  {CI->getArgOperand(0)});
2210  } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) {
2211  if (CI->arg_size() == 4 &&
2212  (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2213  cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2214  Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2215  : Intrinsic::x86_avx512_sqrt_pd_512;
2216 
2217  Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
2218  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
2219  IID), Args);
2220  } else {
2221  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2222  Intrinsic::sqrt,
2223  CI->getType()),
2224  {CI->getArgOperand(0)});
2225  }
2226  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2227  CI->getArgOperand(1));
2228  } else if (IsX86 && (Name.startswith("avx512.ptestm") ||
2229  Name.startswith("avx512.ptestnm"))) {
2230  Value *Op0 = CI->getArgOperand(0);
2231  Value *Op1 = CI->getArgOperand(1);
2232  Value *Mask = CI->getArgOperand(2);
2233  Rep = Builder.CreateAnd(Op0, Op1);
2234  llvm::Type *Ty = Op0->getType();
2235  Value *Zero = llvm::Constant::getNullValue(Ty);
2236  ICmpInst::Predicate Pred =
2237  Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2238  Rep = Builder.CreateICmp(Pred, Rep, Zero);
2239  Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
2240  } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
2241  unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2242  ->getNumElements();
2243  Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2244  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2245  CI->getArgOperand(1));
2246  } else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
2247  unsigned NumElts = CI->getType()->getScalarSizeInBits();
2248  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2249  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2250  int Indices[64];
2251  for (unsigned i = 0; i != NumElts; ++i)
2252  Indices[i] = i;
2253 
2254  // First extract half of each vector. This gives better codegen than
2255  // doing it in a single shuffle.
2256  LHS = Builder.CreateShuffleVector(LHS, LHS,
2257  makeArrayRef(Indices, NumElts / 2));
2258  RHS = Builder.CreateShuffleVector(RHS, RHS,
2259  makeArrayRef(Indices, NumElts / 2));
2260  // Concat the vectors.
2261  // NOTE: Operands have to be swapped to match intrinsic definition.
2262  Rep = Builder.CreateShuffleVector(RHS, LHS,
2263  makeArrayRef(Indices, NumElts));
2264  Rep = Builder.CreateBitCast(Rep, CI->getType());
2265  } else if (IsX86 && Name == "avx512.kand.w") {
2266  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2267  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2268  Rep = Builder.CreateAnd(LHS, RHS);
2269  Rep = Builder.CreateBitCast(Rep, CI->getType());
2270  } else if (IsX86 && Name == "avx512.kandn.w") {
2271  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2272  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2273  LHS = Builder.CreateNot(LHS);
2274  Rep = Builder.CreateAnd(LHS, RHS);
2275  Rep = Builder.CreateBitCast(Rep, CI->getType());
2276  } else if (IsX86 && Name == "avx512.kor.w") {
2277  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2278  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2279  Rep = Builder.CreateOr(LHS, RHS);
2280  Rep = Builder.CreateBitCast(Rep, CI->getType());
2281  } else if (IsX86 && Name == "avx512.kxor.w") {
2282  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2283  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2284  Rep = Builder.CreateXor(LHS, RHS);
2285  Rep = Builder.CreateBitCast(Rep, CI->getType());
2286  } else if (IsX86 && Name == "avx512.kxnor.w") {
2287  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2288  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2289  LHS = Builder.CreateNot(LHS);
2290  Rep = Builder.CreateXor(LHS, RHS);
2291  Rep = Builder.CreateBitCast(Rep, CI->getType());
2292  } else if (IsX86 && Name == "avx512.knot.w") {
2293  Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2294  Rep = Builder.CreateNot(Rep);
2295  Rep = Builder.CreateBitCast(Rep, CI->getType());
2296  } else if (IsX86 &&
2297  (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
2298  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2299  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2300  Rep = Builder.CreateOr(LHS, RHS);
2301  Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2302  Value *C;
2303  if (Name[14] == 'c')
2304  C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2305  else
2306  C = ConstantInt::getNullValue(Builder.getInt16Ty());
2307  Rep = Builder.CreateICmpEQ(Rep, C);
2308  Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2309  } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2310  Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2311  Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2312  Name == "sse.div.ss" || Name == "sse2.div.sd")) {
2313  Type *I32Ty = Type::getInt32Ty(C);
2314  Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2315  ConstantInt::get(I32Ty, 0));
2316  Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2317  ConstantInt::get(I32Ty, 0));
2318  Value *EltOp;
2319  if (Name.contains(".add."))
2320  EltOp = Builder.CreateFAdd(Elt0, Elt1);
2321  else if (Name.contains(".sub."))
2322  EltOp = Builder.CreateFSub(Elt0, Elt1);
2323  else if (Name.contains(".mul."))
2324  EltOp = Builder.CreateFMul(Elt0, Elt1);
2325  else
2326  EltOp = Builder.CreateFDiv(Elt0, Elt1);
2327  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2328  ConstantInt::get(I32Ty, 0));
2329  } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
2330  // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2331  bool CmpEq = Name[16] == 'e';
2332  Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2333  } else if (IsX86 && Name.startswith("avx512.mask.vpshufbitqmb.")) {
2334  Type *OpTy = CI->getArgOperand(0)->getType();
2335  unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2336  Intrinsic::ID IID;
2337  switch (VecWidth) {
2338  default: llvm_unreachable("Unexpected intrinsic");
2339  case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break;
2340  case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break;
2341  case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break;
2342  }
2343 
2344  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2345  { CI->getOperand(0), CI->getArgOperand(1) });
2346  Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2347  } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
2348  Type *OpTy = CI->getArgOperand(0)->getType();
2349  unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2350  unsigned EltWidth = OpTy->getScalarSizeInBits();
2351  Intrinsic::ID IID;
2352  if (VecWidth == 128 && EltWidth == 32)
2353  IID = Intrinsic::x86_avx512_fpclass_ps_128;
2354  else if (VecWidth == 256 && EltWidth == 32)
2355  IID = Intrinsic::x86_avx512_fpclass_ps_256;
2356  else if (VecWidth == 512 && EltWidth == 32)
2357  IID = Intrinsic::x86_avx512_fpclass_ps_512;
2358  else if (VecWidth == 128 && EltWidth == 64)
2359  IID = Intrinsic::x86_avx512_fpclass_pd_128;
2360  else if (VecWidth == 256 && EltWidth == 64)
2361  IID = Intrinsic::x86_avx512_fpclass_pd_256;
2362  else if (VecWidth == 512 && EltWidth == 64)
2363  IID = Intrinsic::x86_avx512_fpclass_pd_512;
2364  else
2365  llvm_unreachable("Unexpected intrinsic");
2366 
2367  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2368  { CI->getOperand(0), CI->getArgOperand(1) });
2369  Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2370  } else if (IsX86 && Name.startswith("avx512.cmp.p")) {
2372  Type *OpTy = Args[0]->getType();
2373  unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2374  unsigned EltWidth = OpTy->getScalarSizeInBits();
2375  Intrinsic::ID IID;
2376  if (VecWidth == 128 && EltWidth == 32)
2377  IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2378  else if (VecWidth == 256 && EltWidth == 32)
2379  IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2380  else if (VecWidth == 512 && EltWidth == 32)
2381  IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2382  else if (VecWidth == 128 && EltWidth == 64)
2383  IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2384  else if (VecWidth == 256 && EltWidth == 64)
2385  IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2386  else if (VecWidth == 512 && EltWidth == 64)
2387  IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2388  else
2389  llvm_unreachable("Unexpected intrinsic");
2390 
2392  if (VecWidth == 512)
2393  std::swap(Mask, Args.back());
2394  Args.push_back(Mask);
2395 
2396  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2397  Args);
2398  } else if (IsX86 && Name.startswith("avx512.mask.cmp.")) {
2399  // Integer compare intrinsics.
2400  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2401  Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2402  } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) {
2403  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2404  Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2405  } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") ||
2406  Name.startswith("avx512.cvtw2mask.") ||
2407  Name.startswith("avx512.cvtd2mask.") ||
2408  Name.startswith("avx512.cvtq2mask."))) {
2409  Value *Op = CI->getArgOperand(0);
2410  Value *Zero = llvm::Constant::getNullValue(Op->getType());
2411  Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2412  Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2413  } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
2414  Name == "ssse3.pabs.w.128" ||
2415  Name == "ssse3.pabs.d.128" ||
2416  Name.startswith("avx2.pabs") ||
2417  Name.startswith("avx512.mask.pabs"))) {
2418  Rep = upgradeAbs(Builder, *CI);
2419  } else if (IsX86 && (Name == "sse41.pmaxsb" ||
2420  Name == "sse2.pmaxs.w" ||
2421  Name == "sse41.pmaxsd" ||
2422  Name.startswith("avx2.pmaxs") ||
2423  Name.startswith("avx512.mask.pmaxs"))) {
2425  } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
2426  Name == "sse41.pmaxuw" ||
2427  Name == "sse41.pmaxud" ||
2428  Name.startswith("avx2.pmaxu") ||
2429  Name.startswith("avx512.mask.pmaxu"))) {
2431  } else if (IsX86 && (Name == "sse41.pminsb" ||
2432  Name == "sse2.pmins.w" ||
2433  Name == "sse41.pminsd" ||
2434  Name.startswith("avx2.pmins") ||
2435  Name.startswith("avx512.mask.pmins"))) {
2437  } else if (IsX86 && (Name == "sse2.pminu.b" ||
2438  Name == "sse41.pminuw" ||
2439  Name == "sse41.pminud" ||
2440  Name.startswith("avx2.pminu") ||
2441  Name.startswith("avx512.mask.pminu"))) {
2443  } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
2444  Name == "avx2.pmulu.dq" ||
2445  Name == "avx512.pmulu.dq.512" ||
2446  Name.startswith("avx512.mask.pmulu.dq."))) {
2447  Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
2448  } else if (IsX86 && (Name == "sse41.pmuldq" ||
2449  Name == "avx2.pmul.dq" ||
2450  Name == "avx512.pmul.dq.512" ||
2451  Name.startswith("avx512.mask.pmul.dq."))) {
2452  Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
2453  } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
2454  Name == "sse2.cvtsi2sd" ||
2455  Name == "sse.cvtsi642ss" ||
2456  Name == "sse2.cvtsi642sd")) {
2457  Rep = Builder.CreateSIToFP(
2458  CI->getArgOperand(1),
2459  cast<VectorType>(CI->getType())->getElementType());
2460  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2461  } else if (IsX86 && Name == "avx512.cvtusi2sd") {
2462  Rep = Builder.CreateUIToFP(
2463  CI->getArgOperand(1),
2464  cast<VectorType>(CI->getType())->getElementType());
2465  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2466  } else if (IsX86 && Name == "sse2.cvtss2sd") {
2467  Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2468  Rep = Builder.CreateFPExt(
2469  Rep, cast<VectorType>(CI->getType())->getElementType());
2470  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2471  } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
2472  Name == "sse2.cvtdq2ps" ||
2473  Name == "avx.cvtdq2.pd.256" ||
2474  Name == "avx.cvtdq2.ps.256" ||
2475  Name.startswith("avx512.mask.cvtdq2pd.") ||
2476  Name.startswith("avx512.mask.cvtudq2pd.") ||
2477  Name.startswith("avx512.mask.cvtdq2ps.") ||
2478  Name.startswith("avx512.mask.cvtudq2ps.") ||
2479  Name.startswith("avx512.mask.cvtqq2pd.") ||
2480  Name.startswith("avx512.mask.cvtuqq2pd.") ||
2481  Name == "avx512.mask.cvtqq2ps.256" ||
2482  Name == "avx512.mask.cvtqq2ps.512" ||
2483  Name == "avx512.mask.cvtuqq2ps.256" ||
2484  Name == "avx512.mask.cvtuqq2ps.512" ||
2485  Name == "sse2.cvtps2pd" ||
2486  Name == "avx.cvt.ps2.pd.256" ||
2487  Name == "avx512.mask.cvtps2pd.128" ||
2488  Name == "avx512.mask.cvtps2pd.256")) {
2489  auto *DstTy = cast<FixedVectorType>(CI->getType());
2490  Rep = CI->getArgOperand(0);
2491  auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2492 
2493  unsigned NumDstElts = DstTy->getNumElements();
2494  if (NumDstElts < SrcTy->getNumElements()) {
2495  assert(NumDstElts == 2 && "Unexpected vector size");
2496  Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
2497  }
2498 
2499  bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2500  bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
2501  if (IsPS2PD)
2502  Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2503  else if (CI->arg_size() == 4 &&
2504  (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2505  cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2506  Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2507  : Intrinsic::x86_avx512_sitofp_round;
2509  { DstTy, SrcTy });
2510  Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) });
2511  } else {
2512  Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2513  : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2514  }
2515 
2516  if (CI->arg_size() >= 3)
2517  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2518  CI->getArgOperand(1));
2519  } else if (IsX86 && (Name.startswith("avx512.mask.vcvtph2ps.") ||
2520  Name.startswith("vcvtph2ps."))) {
2521  auto *DstTy = cast<FixedVectorType>(CI->getType());
2522  Rep = CI->getArgOperand(0);
2523  auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2524  unsigned NumDstElts = DstTy->getNumElements();
2525  if (NumDstElts != SrcTy->getNumElements()) {
2526  assert(NumDstElts == 4 && "Unexpected vector size");
2527  Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
2528  }
2529  Rep = Builder.CreateBitCast(
2530  Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
2531  Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
2532  if (CI->arg_size() >= 3)
2533  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2534  CI->getArgOperand(1));
2535  } else if (IsX86 && Name.startswith("avx512.mask.load")) {
2536  // "avx512.mask.loadu." or "avx512.mask.load."
2537  bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
2538  Rep =
2540  CI->getArgOperand(2), Aligned);
2541  } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
2542  auto *ResultTy = cast<FixedVectorType>(CI->getType());
2543  Type *PtrTy = ResultTy->getElementType();
2544 
2545  // Cast the pointer to element type.
2546  Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2548 
2549  Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2550  ResultTy->getNumElements());
2551 
2552  Function *ELd = Intrinsic::getDeclaration(F->getParent(),
2553  Intrinsic::masked_expandload,
2554  ResultTy);
2555  Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
2556  } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) {
2557  auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
2558  Type *PtrTy = ResultTy->getElementType();
2559 
2560  // Cast the pointer to element type.
2561  Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2563 
2564  Value *MaskVec =
2566  cast<FixedVectorType>(ResultTy)->getNumElements());
2567 
2568  Function *CSt = Intrinsic::getDeclaration(F->getParent(),
2569  Intrinsic::masked_compressstore,
2570  ResultTy);
2571  Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
2572  } else if (IsX86 && (Name.startswith("avx512.mask.compress.") ||
2573  Name.startswith("avx512.mask.expand."))) {
2574  auto *ResultTy = cast<FixedVectorType>(CI->getType());
2575 
2576  Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2577  ResultTy->getNumElements());
2578 
2579  bool IsCompress = Name[12] == 'c';
2580  Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2581  : Intrinsic::x86_avx512_mask_expand;
2582  Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
2583  Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1),
2584  MaskVec });
2585  } else if (IsX86 && Name.startswith("xop.vpcom")) {
2586  bool IsSigned;
2587  if (Name.endswith("ub") || Name.endswith("uw") || Name.endswith("ud") ||
2588  Name.endswith("uq"))
2589  IsSigned = false;
2590  else if (Name.endswith("b") || Name.endswith("w") || Name.endswith("d") ||
2591  Name.endswith("q"))
2592  IsSigned = true;
2593  else
2594  llvm_unreachable("Unknown suffix");
2595 
2596  unsigned Imm;
2597  if (CI->arg_size() == 3) {
2598  Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2599  } else {
2600  Name = Name.substr(9); // strip off "xop.vpcom"
2601  if (Name.startswith("lt"))
2602  Imm = 0;
2603  else if (Name.startswith("le"))
2604  Imm = 1;
2605  else if (Name.startswith("gt"))
2606  Imm = 2;
2607  else if (Name.startswith("ge"))
2608  Imm = 3;
2609  else if (Name.startswith("eq"))
2610  Imm = 4;
2611  else if (Name.startswith("ne"))
2612  Imm = 5;
2613  else if (Name.startswith("false"))
2614  Imm = 6;
2615  else if (Name.startswith("true"))
2616  Imm = 7;
2617  else
2618  llvm_unreachable("Unknown condition");
2619  }
2620 
2621  Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
2622  } else if (IsX86 && Name.startswith("xop.vpcmov")) {
2623  Value *Sel = CI->getArgOperand(2);
2624  Value *NotSel = Builder.CreateNot(Sel);
2625  Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2626  Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2627  Rep = Builder.CreateOr(Sel0, Sel1);
2628  } else if (IsX86 && (Name.startswith("xop.vprot") ||
2629  Name.startswith("avx512.prol") ||
2630  Name.startswith("avx512.mask.prol"))) {
2631  Rep = upgradeX86Rotate(Builder, *CI, false);
2632  } else if (IsX86 && (Name.startswith("avx512.pror") ||
2633  Name.startswith("avx512.mask.pror"))) {
2634  Rep = upgradeX86Rotate(Builder, *CI, true);
2635  } else if (IsX86 && (Name.startswith("avx512.vpshld.") ||
2636  Name.startswith("avx512.mask.vpshld") ||
2637  Name.startswith("avx512.maskz.vpshld"))) {
2638  bool ZeroMask = Name[11] == 'z';
2639  Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2640  } else if (IsX86 && (Name.startswith("avx512.vpshrd.") ||
2641  Name.startswith("avx512.mask.vpshrd") ||
2642  Name.startswith("avx512.maskz.vpshrd"))) {
2643  bool ZeroMask = Name[11] == 'z';
2644  Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
2645  } else if (IsX86 && Name == "sse42.crc32.64.8") {
2646  Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
2647  Intrinsic::x86_sse42_crc32_32_8);
2648  Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2649  Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
2650  Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2651  } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") ||
2652  Name.startswith("avx512.vbroadcast.s"))) {
2653  // Replace broadcasts with a series of insertelements.
2654  auto *VecTy = cast<FixedVectorType>(CI->getType());
2655  Type *EltTy = VecTy->getElementType();
2656  unsigned EltNum = VecTy->getNumElements();
2657  Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
2658  EltTy->getPointerTo());
2659  Value *Load = Builder.CreateLoad(EltTy, Cast);
2660  Type *I32Ty = Type::getInt32Ty(C);
2661  Rep = PoisonValue::get(VecTy);
2662  for (unsigned I = 0; I < EltNum; ++I)
2663  Rep = Builder.CreateInsertElement(Rep, Load,
2664  ConstantInt::get(I32Ty, I));
2665  } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
2666  Name.startswith("sse41.pmovzx") ||
2667  Name.startswith("avx2.pmovsx") ||
2668  Name.startswith("avx2.pmovzx") ||
2669  Name.startswith("avx512.mask.pmovsx") ||
2670  Name.startswith("avx512.mask.pmovzx"))) {
2671  auto *DstTy = cast<FixedVectorType>(CI->getType());
2672  unsigned NumDstElts = DstTy->getNumElements();
2673 
2674  // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2675  SmallVector<int, 8> ShuffleMask(NumDstElts);
2676  for (unsigned i = 0; i != NumDstElts; ++i)
2677  ShuffleMask[i] = i;
2678 
2679  Value *SV =
2680  Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
2681 
2682  bool DoSext = (StringRef::npos != Name.find("pmovsx"));
2683  Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
2684  : Builder.CreateZExt(SV, DstTy);
2685  // If there are 3 arguments, it's a masked intrinsic so we need a select.
2686  if (CI->arg_size() == 3)
2687  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2688  CI->getArgOperand(1));
2689  } else if (Name == "avx512.mask.pmov.qd.256" ||
2690  Name == "avx512.mask.pmov.qd.512" ||
2691  Name == "avx512.mask.pmov.wb.256" ||
2692  Name == "avx512.mask.pmov.wb.512") {
2693  Type *Ty = CI->getArgOperand(1)->getType();
2694  Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
2695  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2696  CI->getArgOperand(1));
2697  } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
2698  Name == "avx2.vbroadcasti128")) {
2699  // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2700  Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
2701  unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2702  auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
2703  Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
2705  Value *Load = Builder.CreateAlignedLoad(VT, Op, Align(1));
2706  if (NumSrcElts == 2)
2707  Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
2708  else
2709  Rep = Builder.CreateShuffleVector(
2710  Load, ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
2711  } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
2712  Name.startswith("avx512.mask.shuf.f"))) {
2713  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2714  Type *VT = CI->getType();
2715  unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2716  unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2717  unsigned ControlBitsMask = NumLanes - 1;
2718  unsigned NumControlBits = NumLanes / 2;
2719  SmallVector<int, 8> ShuffleMask(0);
2720 
2721  for (unsigned l = 0; l != NumLanes; ++l) {
2722  unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2723  // We actually need the other source.
2724  if (l >= NumLanes / 2)
2725  LaneMask += NumLanes;
2726  for (unsigned i = 0; i != NumElementsInLane; ++i)
2727  ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
2728  }
2729  Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2730  CI->getArgOperand(1), ShuffleMask);
2731  Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2732  CI->getArgOperand(3));
2733  }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
2734  Name.startswith("avx512.mask.broadcasti"))) {
2735  unsigned NumSrcElts =
2736  cast<FixedVectorType>(CI->getArgOperand(0)->getType())
2737  ->getNumElements();
2738  unsigned NumDstElts =
2739  cast<FixedVectorType>(CI->getType())->getNumElements();
2740 
2741  SmallVector<int, 8> ShuffleMask(NumDstElts);
2742  for (unsigned i = 0; i != NumDstElts; ++i)
2743  ShuffleMask[i] = i % NumSrcElts;
2744 
2745  Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2746  CI->getArgOperand(0),
2747  ShuffleMask);
2748  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2749  CI->getArgOperand(1));
2750  } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
2751  Name.startswith("avx2.vbroadcast") ||
2752  Name.startswith("avx512.pbroadcast") ||
2753  Name.startswith("avx512.mask.broadcast.s"))) {
2754  // Replace vp?broadcasts with a vector shuffle.
2755  Value *Op = CI->getArgOperand(0);
2756  ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
2757  Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
2760  Rep = Builder.CreateShuffleVector(Op, M);
2761 
2762  if (CI->arg_size() == 3)
2763  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2764  CI->getArgOperand(1));
2765  } else if (IsX86 && (Name.startswith("sse2.padds.") ||
2766  Name.startswith("avx2.padds.") ||
2767  Name.startswith("avx512.padds.") ||
2768  Name.startswith("avx512.mask.padds."))) {
2769  Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
2770  } else if (IsX86 && (Name.startswith("sse2.psubs.") ||
2771  Name.startswith("avx2.psubs.") ||
2772  Name.startswith("avx512.psubs.") ||
2773  Name.startswith("avx512.mask.psubs."))) {
2774  Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
2775  } else if (IsX86 && (Name.startswith("sse2.paddus.") ||
2776  Name.startswith("avx2.paddus.") ||
2777  Name.startswith("avx512.mask.paddus."))) {
2778  Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
2779  } else if (IsX86 && (Name.startswith("sse2.psubus.") ||
2780  Name.startswith("avx2.psubus.") ||
2781  Name.startswith("avx512.mask.psubus."))) {
2782  Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
2783  } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
2785  CI->getArgOperand(1),
2786  CI->getArgOperand(2),
2787  CI->getArgOperand(3),
2788  CI->getArgOperand(4),
2789  false);
2790  } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
2792  CI->getArgOperand(1),
2793  CI->getArgOperand(2),
2794  CI->getArgOperand(3),
2795  CI->getArgOperand(4),
2796  true);
2797  } else if (IsX86 && (Name == "sse2.psll.dq" ||
2798  Name == "avx2.psll.dq")) {
2799  // 128/256-bit shift left specified in bits.
2800  unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2802  Shift / 8); // Shift is in bits.
2803  } else if (IsX86 && (Name == "sse2.psrl.dq" ||
2804  Name == "avx2.psrl.dq")) {
2805  // 128/256-bit shift right specified in bits.
2806  unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2808  Shift / 8); // Shift is in bits.
2809  } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
2810  Name == "avx2.psll.dq.bs" ||
2811  Name == "avx512.psll.dq.512")) {
2812  // 128/256/512-bit shift left specified in bytes.
2813  unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2815  } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
2816  Name == "avx2.psrl.dq.bs" ||
2817  Name == "avx512.psrl.dq.512")) {
2818  // 128/256/512-bit shift right specified in bytes.
2819  unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2821  } else if (IsX86 && (Name == "sse41.pblendw" ||
2822  Name.startswith("sse41.blendp") ||
2823  Name.startswith("avx.blend.p") ||
2824  Name == "avx2.pblendw" ||
2825  Name.startswith("avx2.pblendd."))) {
2826  Value *Op0 = CI->getArgOperand(0);
2827  Value *Op1 = CI->getArgOperand(1);
2828  unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2829  auto *VecTy = cast<FixedVectorType>(CI->getType());
2830  unsigned NumElts = VecTy->getNumElements();
2831 
2832  SmallVector<int, 16> Idxs(NumElts);
2833  for (unsigned i = 0; i != NumElts; ++i)
2834  Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
2835 
2836  Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2837  } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
2838  Name == "avx2.vinserti128" ||
2839  Name.startswith("avx512.mask.insert"))) {
2840  Value *Op0 = CI->getArgOperand(0);
2841  Value *Op1 = CI->getArgOperand(1);
2842  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2843  unsigned DstNumElts =
2844  cast<FixedVectorType>(CI->getType())->getNumElements();
2845  unsigned SrcNumElts =
2846  cast<FixedVectorType>(Op1->getType())->getNumElements();
2847  unsigned Scale = DstNumElts / SrcNumElts;
2848 
2849  // Mask off the high bits of the immediate value; hardware ignores those.
2850  Imm = Imm % Scale;
2851 
2852  // Extend the second operand into a vector the size of the destination.
2853  SmallVector<int, 8> Idxs(DstNumElts);
2854  for (unsigned i = 0; i != SrcNumElts; ++i)
2855  Idxs[i] = i;
2856  for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
2857  Idxs[i] = SrcNumElts;
2858  Rep = Builder.CreateShuffleVector(Op1, Idxs);
2859 
2860  // Insert the second operand into the first operand.
2861 
2862  // Note that there is no guarantee that instruction lowering will actually
2863  // produce a vinsertf128 instruction for the created shuffles. In
2864  // particular, the 0 immediate case involves no lane changes, so it can
2865  // be handled as a blend.
2866 
2867  // Example of shuffle mask for 32-bit elements:
2868  // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
2869  // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
2870 
2871  // First fill with identify mask.
2872  for (unsigned i = 0; i != DstNumElts; ++i)
2873  Idxs[i] = i;
2874  // Then replace the elements where we need to insert.
2875  for (unsigned i = 0; i != SrcNumElts; ++i)
2876  Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
2877  Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
2878 
2879  // If the intrinsic has a mask operand, handle that.
2880  if (CI->arg_size() == 5)
2881  Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2882  CI->getArgOperand(3));
2883  } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
2884  Name == "avx2.vextracti128" ||
2885  Name.startswith("avx512.mask.vextract"))) {
2886  Value *Op0 = CI->getArgOperand(0);
2887  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2888  unsigned DstNumElts =
2889  cast<FixedVectorType>(CI->getType())->getNumElements();
2890  unsigned SrcNumElts =
2891  cast<FixedVectorType>(Op0->getType())->getNumElements();
2892  unsigned Scale = SrcNumElts / DstNumElts;
2893 
2894  // Mask off the high bits of the immediate value; hardware ignores those.
2895  Imm = Imm % Scale;
2896 
2897  // Get indexes for the subvector of the input vector.
2898  SmallVector<int, 8> Idxs(DstNumElts);
2899  for (unsigned i = 0; i != DstNumElts; ++i) {
2900  Idxs[i] = i + (Imm * DstNumElts);
2901  }
2902  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2903 
2904  // If the intrinsic has a mask operand, handle that.
2905  if (CI->arg_size() == 4)
2906  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2907  CI->getArgOperand(2));
2908  } else if (!IsX86 && Name == "stackprotectorcheck") {
2909  Rep = nullptr;
2910  } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
2911  Name.startswith("avx512.mask.perm.di."))) {
2912  Value *Op0 = CI->getArgOperand(0);
2913  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2914  auto *VecTy = cast<FixedVectorType>(CI->getType());
2915  unsigned NumElts = VecTy->getNumElements();
2916 
2917  SmallVector<int, 8> Idxs(NumElts);
2918  for (unsigned i = 0; i != NumElts; ++i)
2919  Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
2920 
2921  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2922 
2923  if (CI->arg_size() == 4)
2924  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2925  CI->getArgOperand(2));
2926  } else if (IsX86 && (Name.startswith("avx.vperm2f128.") ||
2927  Name == "avx2.vperm2i128")) {
2928  // The immediate permute control byte looks like this:
2929  // [1:0] - select 128 bits from sources for low half of destination
2930  // [2] - ignore
2931  // [3] - zero low half of destination
2932  // [5:4] - select 128 bits from sources for high half of destination
2933  // [6] - ignore
2934  // [7] - zero high half of destination
2935 
2936  uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2937 
2938  unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2939  unsigned HalfSize = NumElts / 2;
2940  SmallVector<int, 8> ShuffleMask(NumElts);
2941 
2942  // Determine which operand(s) are actually in use for this instruction.
2943  Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2944  Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2945 
2946  // If needed, replace operands based on zero mask.
2947  V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
2948  V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
2949 
2950  // Permute low half of result.
2951  unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
2952  for (unsigned i = 0; i < HalfSize; ++i)
2953  ShuffleMask[i] = StartIndex + i;
2954 
2955  // Permute high half of result.
2956  StartIndex = (Imm & 0x10) ? HalfSize : 0;
2957  for (unsigned i = 0; i < HalfSize; ++i)
2958  ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
2959 
2960  Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2961 
2962  } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
2963  Name == "sse2.pshuf.d" ||
2964  Name.startswith("avx512.mask.vpermil.p") ||
2965  Name.startswith("avx512.mask.pshuf.d."))) {
2966  Value *Op0 = CI->getArgOperand(0);
2967  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2968  auto *VecTy = cast<FixedVectorType>(CI->getType());
2969  unsigned NumElts = VecTy->getNumElements();
2970  // Calculate the size of each index in the immediate.
2971  unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
2972  unsigned IdxMask = ((1 << IdxSize) - 1);
2973 
2974  SmallVector<int, 8> Idxs(NumElts);
2975  // Lookup the bits for this element, wrapping around the immediate every
2976  // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
2977  // to offset by the first index of each group.
2978  for (unsigned i = 0; i != NumElts; ++i)
2979  Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
2980 
2981  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2982 
2983  if (CI->arg_size() == 4)
2984  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2985  CI->getArgOperand(2));
2986  } else if (IsX86 && (Name == "sse2.pshufl.w" ||
2987  Name.startswith("avx512.mask.pshufl.w."))) {
2988  Value *Op0 = CI->getArgOperand(0);
2989  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2990  unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2991 
2992  SmallVector<int, 16> Idxs(NumElts);
2993  for (unsigned l = 0; l != NumElts; l += 8) {
2994  for (unsigned i = 0; i != 4; ++i)
2995  Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
2996  for (unsigned i = 4; i != 8; ++i)
2997  Idxs[i + l] = i + l;
2998  }
2999 
3000  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3001 
3002  if (CI->arg_size() == 4)
3003  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3004  CI->getArgOperand(2));
3005  } else if (IsX86 && (Name == "sse2.pshufh.w" ||
3006  Name.startswith("avx512.mask.pshufh.w."))) {
3007  Value *Op0 = CI->getArgOperand(0);
3008  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3009  unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3010 
3011  SmallVector<int, 16> Idxs(NumElts);
3012  for (unsigned l = 0; l != NumElts; l += 8) {
3013  for (unsigned i = 0; i != 4; ++i)
3014  Idxs[i + l] = i + l;
3015  for (unsigned i = 0; i != 4; ++i)
3016  Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3017  }
3018 
3019  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3020 
3021  if (CI->arg_size() == 4)
3022  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3023  CI->getArgOperand(2));
3024  } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
3025  Value *Op0 = CI->getArgOperand(0);
3026  Value *Op1 = CI->getArgOperand(1);
3027  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3028  unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3029 
3030  unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
3031  unsigned HalfLaneElts = NumLaneElts / 2;
3032 
3033  SmallVector<int, 16> Idxs(NumElts);
3034  for (unsigned i = 0; i != NumElts; ++i) {
3035  // Base index is the starting element of the lane.
3036  Idxs[i] = i - (i % NumLaneElts);
3037  // If we are half way through the lane switch to the other source.
3038  if ((i % NumLaneElts) >= HalfLaneElts)
3039  Idxs[i] += NumElts;
3040  // Now select the specific element. By adding HalfLaneElts bits from
3041  // the immediate. Wrapping around the immediate every 8-bits.
3042  Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3043  }
3044 
3045  Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3046 
3047  Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
3048  CI->getArgOperand(3));
3049  } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
3050  Name.startswith("avx512.mask.movshdup") ||
3051  Name.startswith("avx512.mask.movsldup"))) {
3052  Value *Op0 = CI->getArgOperand(0);
3053  unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3054  unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
3055 
3056  unsigned Offset = 0;
3057  if (Name.startswith("avx512.mask.movshdup."))
3058  Offset = 1;
3059 
3060  SmallVector<int, 16> Idxs(NumElts);
3061  for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3062  for (unsigned i = 0; i != NumLaneElts; i += 2) {
3063  Idxs[i + l + 0] = i + l + Offset;
3064  Idxs[i + l + 1] = i + l + Offset;
3065  }
3066 
3067  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3068 
3069  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
3070  CI->getArgOperand(1));
3071  } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
3072  Name.startswith("avx512.mask.unpckl."))) {
3073  Value *Op0 = CI->getArgOperand(0);
3074  Value *Op1 = CI->getArgOperand(1);
3075  int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3076  int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
3077 
3078  SmallVector<int, 64> Idxs(NumElts);
3079  for (int l = 0; l != NumElts; l += NumLaneElts)
3080  for (int i = 0; i != NumLaneElts; ++i)
3081  Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3082 
3083  Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3084 
3085  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3086  CI->getArgOperand(2));
3087  } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
3088  Name.startswith("avx512.mask.unpckh."))) {
3089  Value *Op0 = CI->getArgOperand(0);
3090  Value *Op1 = CI->getArgOperand(1);
3091  int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3092  int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
3093 
3094  SmallVector<int, 64> Idxs(NumElts);
3095  for (int l = 0; l != NumElts; l += NumLaneElts)
3096  for (int i = 0; i != NumLaneElts; ++i)
3097  Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3098 
3099  Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3100 
3101  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3102  CI->getArgOperand(2));
3103  } else if (IsX86 && (Name.startswith("avx512.mask.and.") ||
3104  Name.startswith("avx512.mask.pand."))) {
3105  VectorType *FTy = cast<VectorType>(CI->getType());
3106  VectorType *ITy = VectorType::getInteger(FTy);
3107  Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3108  Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3109  Rep = Builder.CreateBitCast(Rep, FTy);
3110  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3111  CI->getArgOperand(2));
3112  } else if (IsX86 && (Name.startswith("avx512.mask.andn.") ||
3113  Name.startswith("avx512.mask.pandn."))) {
3114  VectorType *FTy = cast<VectorType>(CI->getType());
3115  VectorType *ITy = VectorType::getInteger(FTy);
3116  Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3117  Rep = Builder.CreateAnd(Rep,
3118  Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3119  Rep = Builder.CreateBitCast(Rep, FTy);
3120  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3121  CI->getArgOperand(2));
3122  } else if (IsX86 && (Name.startswith("avx512.mask.or.") ||
3123  Name.startswith("avx512.mask.por."))) {
3124  VectorType *FTy = cast<VectorType>(CI->getType());
3125  VectorType *ITy = VectorType::getInteger(FTy);
3126  Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3127  Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3128  Rep = Builder.CreateBitCast(Rep, FTy);
3129  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3130  CI->getArgOperand(2));
3131  } else if (IsX86 && (Name.startswith("avx512.mask.xor.") ||
3132  Name.startswith("avx512.mask.pxor."))) {
3133  VectorType *FTy = cast<VectorType>(CI->getType());
3134  VectorType *ITy = VectorType::getInteger(FTy);
3135  Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3136  Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3137  Rep = Builder.CreateBitCast(Rep, FTy);
3138  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3139  CI->getArgOperand(2));
3140  } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
3141  Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3142  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3143  CI->getArgOperand(2));
3144  } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
3145  Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3146  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3147  CI->getArgOperand(2));
3148  } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
3149  Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3150  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3151  CI->getArgOperand(2));
3152  } else if (IsX86 && Name.startswith("avx512.mask.add.p")) {
3153  if (Name.endswith(".512")) {
3154  Intrinsic::ID IID;
3155  if (Name[17] == 's')
3156  IID = Intrinsic::x86_avx512_add_ps_512;
3157  else
3158  IID = Intrinsic::x86_avx512_add_pd_512;
3159 
3160  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3161  { CI->getArgOperand(0), CI->getArgOperand(1),
3162  CI->getArgOperand(4) });
3163  } else {
3164  Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3165  }
3166  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3167  CI->getArgOperand(2));
3168  } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
3169  if (Name.endswith(".512")) {
3170  Intrinsic::ID IID;
3171  if (Name[17] == 's')
3172  IID = Intrinsic::x86_avx512_div_ps_512;
3173  else
3174  IID = Intrinsic::x86_avx512_div_pd_512;
3175 
3176  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3177  { CI->getArgOperand(0), CI->getArgOperand(1),
3178  CI->getArgOperand(4) });
3179  } else {
3180  Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3181  }
3182  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3183  CI->getArgOperand(2));
3184  } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
3185  if (Name.endswith(".512")) {
3186  Intrinsic::ID IID;
3187  if (Name[17] == 's')
3188  IID = Intrinsic::x86_avx512_mul_ps_512;
3189  else
3190  IID = Intrinsic::x86_avx512_mul_pd_512;
3191 
3192  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3193  { CI->getArgOperand(0), CI->getArgOperand(1),
3194  CI->getArgOperand(4) });
3195  } else {
3196  Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3197  }
3198  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3199  CI->getArgOperand(2));
3200  } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
3201  if (Name.endswith(".512")) {
3202  Intrinsic::ID IID;
3203  if (Name[17] == 's')
3204  IID = Intrinsic::x86_avx512_sub_ps_512;
3205  else
3206  IID = Intrinsic::x86_avx512_sub_pd_512;
3207 
3208  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3209  { CI->getArgOperand(0), CI->getArgOperand(1),
3210  CI->getArgOperand(4) });
3211  } else {
3212  Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3213  }
3214  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3215  CI->getArgOperand(2));
3216  } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
3217  Name.startswith("avx512.mask.min.p")) &&
3218  Name.drop_front(18) == ".512") {
3219  bool IsDouble = Name[17] == 'd';
3220  bool IsMin = Name[13] == 'i';
3221  static const Intrinsic::ID MinMaxTbl[2][2] = {
3222  { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
3223  { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
3224  };
3225  Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3226 
3227  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3228  { CI->getArgOperand(0), CI->getArgOperand(1),
3229  CI->getArgOperand(4) });
3230  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3231  CI->getArgOperand(2));
3232  } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
3233  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
3234  Intrinsic::ctlz,
3235  CI->getType()),
3236  { CI->getArgOperand(0), Builder.getInt1(false) });
3237  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
3238  CI->getArgOperand(1));
3239  } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
3240  bool IsImmediate = Name[16] == 'i' ||
3241  (Name.size() > 18 && Name[18] == 'i');
3242  bool IsVariable = Name[16] == 'v';
3243  char Size = Name[16] == '.' ? Name[17] :
3244  Name[17] == '.' ? Name[18] :
3245  Name[18] == '.' ? Name[19] :
3246  Name[20];
3247 
3248  Intrinsic::ID IID;
3249  if (IsVariable && Name[17] != '.') {
3250  if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3251  IID = Intrinsic::x86_avx2_psllv_q;
3252  else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3253  IID = Intrinsic::x86_avx2_psllv_q_256;
3254  else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3255  IID = Intrinsic::x86_avx2_psllv_d;
3256  else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3257  IID = Intrinsic::x86_avx2_psllv_d_256;
3258  else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3259  IID = Intrinsic::x86_avx512_psllv_w_128;
3260  else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3261  IID = Intrinsic::x86_avx512_psllv_w_256;
3262  else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3263  IID = Intrinsic::x86_avx512_psllv_w_512;
3264  else
3265  llvm_unreachable("Unexpected size");
3266  } else if (Name.endswith(".128")) {
3267  if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3268  IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3269  : Intrinsic::x86_sse2_psll_d;
3270  else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3271  IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3272  : Intrinsic::x86_sse2_psll_q;
3273  else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3274  IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3275  : Intrinsic::x86_sse2_psll_w;
3276  else
3277  llvm_unreachable("Unexpected size");
3278  } else if (Name.endswith(".256")) {
3279  if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3280  IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3281  : Intrinsic::x86_avx2_psll_d;
3282  else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3283  IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3284  : Intrinsic::x86_avx2_psll_q;
3285  else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3286  IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3287  : Intrinsic::x86_avx2_psll_w;
3288  else
3289  llvm_unreachable("Unexpected size");
3290  } else {
3291  if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3292  IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
3293  IsVariable ? Intrinsic::x86_avx512_psllv_d_512 :
3294  Intrinsic::x86_avx512_psll_d_512;
3295  else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3296  IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
3297  IsVariable ? Intrinsic::x86_avx512_psllv_q_512 :
3298  Intrinsic::x86_avx512_psll_q_512;
3299  else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3300  IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3301  : Intrinsic::x86_avx512_psll_w_512;
3302  else
3303  llvm_unreachable("Unexpected size");
3304  }
3305 
3306  Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3307  } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
3308  bool IsImmediate = Name[16] == 'i' ||
3309  (Name.size() > 18 && Name[18] == 'i');
3310  bool IsVariable = Name[16] == 'v';
3311  char Size = Name[16] == '.' ? Name[17] :
3312  Name[17] == '.' ? Name[18] :
3313  Name[18] == '.' ? Name[19] :
3314  Name[20];
3315 
3316  Intrinsic::ID IID;
3317  if (IsVariable && Name[17] != '.') {
3318  if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3319  IID = Intrinsic::x86_avx2_psrlv_q;
3320  else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3321  IID = Intrinsic::x86_avx2_psrlv_q_256;
3322  else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3323  IID = Intrinsic::x86_avx2_psrlv_d;
3324  else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3325  IID = Intrinsic::x86_avx2_psrlv_d_256;
3326  else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3327  IID = Intrinsic::x86_avx512_psrlv_w_128;
3328  else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3329  IID = Intrinsic::x86_avx512_psrlv_w_256;
3330  else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3331  IID = Intrinsic::x86_avx512_psrlv_w_512;
3332  else
3333  llvm_unreachable("Unexpected size");
3334  } else if (Name.endswith(".128")) {
3335  if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3336  IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3337  : Intrinsic::x86_sse2_psrl_d;
3338  else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3339  IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3340  : Intrinsic::x86_sse2_psrl_q;
3341  else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3342  IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3343  : Intrinsic::x86_sse2_psrl_w;
3344  else
3345  llvm_unreachable("Unexpected size");
3346  } else if (Name.endswith(".256")) {
3347  if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3348  IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3349  : Intrinsic::x86_avx2_psrl_d;
3350  else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3351  IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3352  : Intrinsic::x86_avx2_psrl_q;
3353  else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3354  IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3355  : Intrinsic::x86_avx2_psrl_w;
3356  else
3357  llvm_unreachable("Unexpected size");
3358  } else {
3359  if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3360  IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
3361  IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 :
3362  Intrinsic::x86_avx512_psrl_d_512;
3363  else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3364  IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
3365  IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 :
3366  Intrinsic::x86_avx512_psrl_q_512;
3367  else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3368  IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3369  : Intrinsic::x86_avx512_psrl_w_512;
3370  else
3371  llvm_unreachable("Unexpected size");
3372  }
3373 
3374  Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3375  } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
3376  bool IsImmediate = Name[16] == 'i' ||
3377  (Name.size() > 18 && Name[18] == 'i');
3378  bool IsVariable = Name[16] == 'v';
3379  char Size = Name[16] == '.' ? Name[17] :
3380  Name[17] == '.' ? Name[18] :
3381  Name[18] == '.' ? Name[19] :
3382  Name[20];
3383 
3384  Intrinsic::ID IID;
3385  if (IsVariable && Name[17] != '.') {
3386  if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3387  IID = Intrinsic::x86_avx2_psrav_d;
3388  else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3389  IID = Intrinsic::x86_avx2_psrav_d_256;
3390  else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3391  IID = Intrinsic::x86_avx512_psrav_w_128;
3392  else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3393  IID = Intrinsic::x86_avx512_psrav_w_256;
3394  else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3395  IID = Intrinsic::x86_avx512_psrav_w_512;
3396  else
3397  llvm_unreachable("Unexpected size");
3398  } else if (Name.endswith(".128")) {
3399  if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3400  IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3401  : Intrinsic::x86_sse2_psra_d;
3402  else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3403  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
3404  IsVariable ? Intrinsic::x86_avx512_psrav_q_128 :
3405  Intrinsic::x86_avx512_psra_q_128;
3406  else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3407  IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3408  : Intrinsic::x86_sse2_psra_w;
3409  else
3410  llvm_unreachable("Unexpected size");
3411  } else if (Name.endswith(".256")) {
3412  if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3413  IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3414  : Intrinsic::x86_avx2_psra_d;
3415  else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3416  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
3417  IsVariable ? Intrinsic::x86_avx512_psrav_q_256 :
3418  Intrinsic::x86_avx512_psra_q_256;
3419  else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3420  IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3421  : Intrinsic::x86_avx2_psra_w;
3422  else
3423  llvm_unreachable("Unexpected size");
3424  } else {
3425  if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3426  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
3427  IsVariable ? Intrinsic::x86_avx512_psrav_d_512 :
3428  Intrinsic::x86_avx512_psra_d_512;
3429  else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3430  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
3431  IsVariable ? Intrinsic::x86_avx512_psrav_q_512 :
3432  Intrinsic::x86_avx512_psra_q_512;
3433  else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3434  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3435  : Intrinsic::x86_avx512_psra_w_512;
3436  else
3437  llvm_unreachable("Unexpected size");
3438  }
3439 
3440  Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3441  } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
3442  Rep = upgradeMaskedMove(Builder, *CI);
3443  } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
3444  Rep = UpgradeMaskToInt(Builder, *CI);
3445  } else if (IsX86 && Name.endswith(".movntdqa")) {
3446  Module *M = F->getParent();
3447  MDNode *Node = MDNode::get(
3449 
3450  Value *Ptr = CI->getArgOperand(0);
3451 
3452  // Convert the type of the pointer to a pointer to the stored type.
3453  Value *BC = Builder.CreateBitCast(
3454  Ptr, PointerType::getUnqual(CI->getType()), "cast");
3455  LoadInst *LI = Builder.CreateAlignedLoad(
3456  CI->getType(), BC,
3458  LI->setMetadata(M->getMDKindID("nontemporal"), Node);
3459  Rep = LI;
3460  } else if (IsX86 && (Name.startswith("fma.vfmadd.") ||
3461  Name.startswith("fma.vfmsub.") ||
3462  Name.startswith("fma.vfnmadd.") ||
3463  Name.startswith("fma.vfnmsub."))) {
3464  bool NegMul = Name[6] == 'n';
3465  bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3466  bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3467 
3468  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3469  CI->getArgOperand(2) };
3470 
3471  if (IsScalar) {
3472  Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3473  Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3474  Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3475  }
3476 
3477  if (NegMul && !IsScalar)
3478  Ops[0] = Builder.CreateFNeg(Ops[0]);
3479  if (NegMul && IsScalar)
3480  Ops[1] = Builder.CreateFNeg(Ops[1]);
3481  if (NegAcc)
3482  Ops[2] = Builder.CreateFNeg(Ops[2]);
3483 
3484  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3485  Intrinsic::fma,
3486  Ops[0]->getType()),
3487  Ops);
3488 
3489  if (IsScalar)
3490  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
3491  (uint64_t)0);
3492  } else if (IsX86 && Name.startswith("fma4.vfmadd.s")) {
3493  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3494  CI->getArgOperand(2) };
3495 
3496  Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3497  Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3498  Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3499 
3500  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3501  Intrinsic::fma,
3502  Ops[0]->getType()),
3503  Ops);
3504 
3505  Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
3506  Rep, (uint64_t)0);
3507  } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") ||
3508  Name.startswith("avx512.maskz.vfmadd.s") ||
3509  Name.startswith("avx512.mask3.vfmadd.s") ||
3510  Name.startswith("avx512.mask3.vfmsub.s") ||
3511  Name.startswith("avx512.mask3.vfnmsub.s"))) {
3512  bool IsMask3 = Name[11] == '3';
3513  bool IsMaskZ = Name[11] == 'z';
3514  // Drop the "avx512.mask." to make it easier.
3515  Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3516  bool NegMul = Name[2] == 'n';
3517  bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3518 
3519  Value *A = CI->getArgOperand(0);
3520  Value *B = CI->getArgOperand(1);
3521  Value *C = CI->getArgOperand(2);
3522 
3523  if (NegMul && (IsMask3 || IsMaskZ))
3524  A = Builder.CreateFNeg(A);
3525  if (NegMul && !(IsMask3 || IsMaskZ))
3526  B = Builder.CreateFNeg(B);
3527  if (NegAcc)
3528  C = Builder.CreateFNeg(C);
3529 
3530  A = Builder.CreateExtractElement(A, (uint64_t)0);
3531  B = Builder.CreateExtractElement(B, (uint64_t)0);
3532  C = Builder.CreateExtractElement(C, (uint64_t)0);
3533 
3534  if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3535  cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3536  Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
3537 
3538  Intrinsic::ID IID;
3539  if (Name.back() == 'd')
3540  IID = Intrinsic::x86_avx512_vfmadd_f64;
3541  else
3542  IID = Intrinsic::x86_avx512_vfmadd_f32;
3544  Rep = Builder.CreateCall(FMA, Ops);
3545  } else {
3547  Intrinsic::fma,
3548  A->getType());
3549  Rep = Builder.CreateCall(FMA, { A, B, C });
3550  }
3551 
3552  Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
3553  IsMask3 ? C : A;
3554 
3555  // For Mask3 with NegAcc, we need to create a new extractelement that
3556  // avoids the negation above.
3557  if (NegAcc && IsMask3)
3558  PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
3559  (uint64_t)0);
3560 
3562  Rep, PassThru);
3563  Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
3564  Rep, (uint64_t)0);
3565  } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") ||
3566  Name.startswith("avx512.mask.vfnmadd.p") ||
3567  Name.startswith("avx512.mask.vfnmsub.p") ||
3568  Name.startswith("avx512.mask3.vfmadd.p") ||
3569  Name.startswith("avx512.mask3.vfmsub.p") ||
3570  Name.startswith("avx512.mask3.vfnmsub.p") ||
3571  Name.startswith("avx512.maskz.vfmadd.p"))) {
3572  bool IsMask3 = Name[11] == '3';
3573  bool IsMaskZ = Name[11] == 'z';
3574  // Drop the "avx512.mask." to make it easier.
3575  Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3576  bool NegMul = Name[2] == 'n';
3577  bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3578 
3579  Value *A = CI->getArgOperand(0);
3580  Value *B = CI->getArgOperand(1);
3581  Value *C = CI->getArgOperand(2);
3582 
3583  if (NegMul && (IsMask3 || IsMaskZ))
3584  A = Builder.CreateFNeg(A);
3585  if (NegMul && !(IsMask3 || IsMaskZ))
3586  B = Builder.CreateFNeg(B);
3587  if (NegAcc)
3588  C = Builder.CreateFNeg(C);
3589 
3590  if (CI->arg_size() == 5 &&
3591  (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3592  cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3593  Intrinsic::ID IID;
3594  // Check the character before ".512" in string.
3595  if (Name[Name.size()-5] == 's')
3596  IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3597  else
3598  IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3599 
3600  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3601  { A, B, C, CI->getArgOperand(4) });
3602  } else {
3604  Intrinsic::fma,
3605  A->getType());
3606  Rep = Builder.CreateCall(FMA, { A, B, C });
3607  }
3608 
3609  Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3610  IsMask3 ? CI->getArgOperand(2) :
3611  CI->getArgOperand(0);
3612 
3613  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3614  } else if (IsX86 && Name.startswith("fma.vfmsubadd.p")) {
3615  unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3616  unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3617  Intrinsic::ID IID;
3618  if (VecWidth == 128 && EltWidth == 32)
3619  IID = Intrinsic::x86_fma_vfmaddsub_ps;
3620  else if (VecWidth == 256 && EltWidth == 32)
3621  IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
3622  else if (VecWidth == 128 && EltWidth == 64)
3623  IID = Intrinsic::x86_fma_vfmaddsub_pd;
3624  else if (VecWidth == 256 && EltWidth == 64)
3625  IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
3626  else
3627  llvm_unreachable("Unexpected intrinsic");
3628 
3629  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3630  CI->getArgOperand(2) };
3631  Ops[2] = Builder.CreateFNeg(Ops[2]);
3632  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3633  Ops);
3634  } else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") ||
3635  Name.startswith("avx512.mask3.vfmaddsub.p") ||
3636  Name.startswith("avx512.maskz.vfmaddsub.p") ||
3637  Name.startswith("avx512.mask3.vfmsubadd.p"))) {
3638  bool IsMask3 = Name[11] == '3';
3639  bool IsMaskZ = Name[11] == 'z';
3640  // Drop the "avx512.mask." to make it easier.
3641  Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3642  bool IsSubAdd = Name[3] == 's';
3643  if (CI->arg_size() == 5) {
3644  Intrinsic::ID IID;
3645  // Check the character before ".512" in string.
3646  if (Name[Name.size()-5] == 's')
3647  IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3648  else
3649  IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3650 
3651  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3652  CI->getArgOperand(2), CI->getArgOperand(4) };
3653  if (IsSubAdd)
3654  Ops[2] = Builder.CreateFNeg(Ops[2]);
3655 
3656  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3657  Ops);
3658  } else {
3659  int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3660 
3661  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3662  CI->getArgOperand(2) };
3663 
3664  Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3665  Ops[0]->getType());
3666  Value *Odd = Builder.CreateCall(FMA, Ops);
3667  Ops[2] = Builder.CreateFNeg(Ops[2]);
3668  Value *Even = Builder.CreateCall(FMA, Ops);
3669 
3670  if (IsSubAdd)
3671  std::swap(Even, Odd);
3672 
3673  SmallVector<int, 32> Idxs(NumElts);
3674  for (int i = 0; i != NumElts; ++i)
3675  Idxs[i] = i + (i % 2) * NumElts;
3676 
3677  Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3678  }
3679 
3680  Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3681  IsMask3 ? CI->getArgOperand(2) :
3682  CI->getArgOperand(0);
3683 
3684  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3685  } else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
3686  Name.startswith("avx512.maskz.pternlog."))) {
3687  bool ZeroMask = Name[11] == 'z';
3688  unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3689  unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3690  Intrinsic::ID IID;
3691  if (VecWidth == 128 && EltWidth == 32)
3692  IID = Intrinsic::x86_avx512_pternlog_d_128;
3693  else if (VecWidth == 256 && EltWidth == 32)
3694  IID = Intrinsic::x86_avx512_pternlog_d_256;
3695  else if (VecWidth == 512 && EltWidth == 32)
3696  IID = Intrinsic::x86_avx512_pternlog_d_512;
3697  else if (VecWidth == 128 && EltWidth == 64)
3698  IID = Intrinsic::x86_avx512_pternlog_q_128;
3699  else if (VecWidth == 256 && EltWidth == 64)
3700  IID = Intrinsic::x86_avx512_pternlog_q_256;
3701  else if (VecWidth == 512 && EltWidth == 64)
3702  IID = Intrinsic::x86_avx512_pternlog_q_512;
3703  else
3704  llvm_unreachable("Unexpected intrinsic");
3705 
3706  Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3707  CI->getArgOperand(2), CI->getArgOperand(3) };
3708  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3709  Args);
3710  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3711  : CI->getArgOperand(0);
3712  Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3713  } else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") ||
3714  Name.startswith("avx512.maskz.vpmadd52"))) {
3715  bool ZeroMask = Name[11] == 'z';
3716  bool High = Name[20] == 'h' || Name[21] == 'h';
3717  unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3718  Intrinsic::ID IID;
3719  if (VecWidth == 128 && !High)
3720  IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3721  else if (VecWidth == 256 && !High)
3722  IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3723  else if (VecWidth == 512 && !High)
3724  IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3725  else if (VecWidth == 128 && High)
3726  IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3727  else if (VecWidth == 256 && High)
3728  IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3729  else if (VecWidth == 512 && High)
3730  IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3731  else
3732  llvm_unreachable("Unexpected intrinsic");
3733 
3734  Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3735  CI->getArgOperand(2) };
3736  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3737  Args);
3738  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3739  : CI->getArgOperand(0);
3740  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3741  } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") ||
3742  Name.startswith("avx512.mask.vpermt2var.") ||
3743  Name.startswith("avx512.maskz.vpermt2var."))) {
3744  bool ZeroMask = Name[11] == 'z';
3745  bool IndexForm = Name[17] == 'i';
3746  Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
3747  } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
3748  Name.startswith("avx512.maskz.vpdpbusd.") ||
3749  Name.startswith("avx512.mask.vpdpbusds.") ||
3750  Name.startswith("avx512.maskz.vpdpbusds."))) {
3751  bool ZeroMask = Name[11] == 'z';
3752  bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3753  unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3754  Intrinsic::ID IID;
3755  if (VecWidth == 128 && !IsSaturating)
3756  IID = Intrinsic::x86_avx512_vpdpbusd_128;
3757  else if (VecWidth == 256 && !IsSaturating)
3758  IID = Intrinsic::x86_avx512_vpdpbusd_256;
3759  else if (VecWidth == 512 && !IsSaturating)
3760  IID = Intrinsic::x86_avx512_vpdpbusd_512;
3761  else if (VecWidth == 128 && IsSaturating)
3762  IID = Intrinsic::x86_avx512_vpdpbusds_128;
3763  else if (VecWidth == 256 && IsSaturating)
3764  IID = Intrinsic::x86_avx512_vpdpbusds_256;
3765  else if (VecWidth == 512 && IsSaturating)
3766  IID = Intrinsic::x86_avx512_vpdpbusds_512;
3767  else
3768  llvm_unreachable("Unexpected intrinsic");
3769 
3770  Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3771  CI->getArgOperand(2) };
3772  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3773  Args);
3774  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3775  : CI->getArgOperand(0);
3776  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3777  } else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") ||
3778  Name.startswith("avx512.maskz.vpdpwssd.") ||
3779  Name.startswith("avx512.mask.vpdpwssds.") ||
3780  Name.startswith("avx512.maskz.vpdpwssds."))) {
3781  bool ZeroMask = Name[11] == 'z';
3782  bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3783  unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3784  Intrinsic::ID IID;
3785  if (VecWidth == 128 && !IsSaturating)
3786  IID = Intrinsic::x86_avx512_vpdpwssd_128;
3787  else if (VecWidth == 256 && !IsSaturating)
3788  IID = Intrinsic::x86_avx512_vpdpwssd_256;
3789  else if (VecWidth == 512 && !IsSaturating)
3790  IID = Intrinsic::x86_avx512_vpdpwssd_512;
3791  else if (VecWidth == 128 && IsSaturating)
3792  IID = Intrinsic::x86_avx512_vpdpwssds_128;
3793  else if (VecWidth == 256 && IsSaturating)
3794  IID = Intrinsic::x86_avx512_vpdpwssds_256;
3795  else if (VecWidth == 512 && IsSaturating)
3796  IID = Intrinsic::x86_avx512_vpdpwssds_512;
3797  else
3798  llvm_unreachable("Unexpected intrinsic");
3799 
3800  Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3801  CI->getArgOperand(2) };
3802  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3803  Args);
3804  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3805  : CI->getArgOperand(0);
3806  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3807  } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
3808  Name == "addcarry.u32" || Name == "addcarry.u64" ||
3809  Name == "subborrow.u32" || Name == "subborrow.u64")) {
3810  Intrinsic::ID IID;
3811  if (Name[0] == 'a' && Name.back() == '2')
3812  IID = Intrinsic::x86_addcarry_32;
3813  else if (Name[0] == 'a' && Name.back() == '4')
3814  IID = Intrinsic::x86_addcarry_64;
3815  else if (Name[0] == 's' && Name.back() == '2')
3816  IID = Intrinsic::x86_subborrow_32;
3817  else if (Name[0] == 's' && Name.back() == '4')
3818  IID = Intrinsic::x86_subborrow_64;
3819  else
3820  llvm_unreachable("Unexpected intrinsic");
3821 
3822  // Make a call with 3 operands.
3823  Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3824  CI->getArgOperand(2)};
3825  Value *NewCall = Builder.CreateCall(
3827  Args);
3828 
3829  // Extract the second result and store it.
3830  Value *Data = Builder.CreateExtractValue(NewCall, 1);
3831  // Cast the pointer to the right type.
3832  Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
3833  llvm::PointerType::getUnqual(Data->getType()));
3834  Builder.CreateAlignedStore(Data, Ptr, Align(1));
3835  // Replace the original call result with the first result of the new call.
3836  Value *CF = Builder.CreateExtractValue(NewCall, 0);
3837 
3838  CI->replaceAllUsesWith(CF);
3839  Rep = nullptr;
3840  } else if (IsX86 && Name.startswith("avx512.mask.") &&
3841  upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
3842  // Rep will be updated by the call in the condition.
3843  } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
3844  Value *Arg = CI->getArgOperand(0);
3845  Value *Neg = Builder.CreateNeg(Arg, "neg");
3846  Value *Cmp = Builder.CreateICmpSGE(
3847  Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
3848  Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
3849  } else if (IsNVVM && (Name.startswith("atomic.load.add.f32.p") ||
3850  Name.startswith("atomic.load.add.f64.p"))) {
3851  Value *Ptr = CI->getArgOperand(0);
3852  Value *Val = CI->getArgOperand(1);
3853  Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
3855  } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
3856  Name == "max.ui" || Name == "max.ull")) {
3857  Value *Arg0 = CI->getArgOperand(0);
3858  Value *Arg1 = CI->getArgOperand(1);
3859  Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3860  ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
3861  : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
3862  Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
3863  } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
3864  Name == "min.ui" || Name == "min.ull")) {
3865  Value *Arg0 = CI->getArgOperand(0);
3866  Value *Arg1 = CI->getArgOperand(1);
3867  Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3868  ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
3869  : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
3870  Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
3871  } else if (IsNVVM && Name == "clz.ll") {
3872  // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
3873  Value *Arg = CI->getArgOperand(0);
3874  Value *Ctlz = Builder.CreateCall(
3875  Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
3876  {Arg->getType()}),
3877  {Arg, Builder.getFalse()}, "ctlz");
3878  Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
3879  } else if (IsNVVM && Name == "popc.ll") {
3880  // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
3881  // i64.
3882  Value *Arg = CI->getArgOperand(0);
3883  Value *Popc = Builder.CreateCall(
3884  Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
3885  {Arg->getType()}),
3886  Arg, "ctpop");
3887  Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
3888  } else if (IsNVVM && Name == "h2f") {
3889  Rep = Builder.CreateCall(Intrinsic::getDeclaration(
3890  F->getParent(), Intrinsic::convert_from_fp16,
3891  {Builder.getFloatTy()}),
3892  CI->getArgOperand(0), "h2f");
3893  } else if (IsARM) {
3894  Rep = UpgradeARMIntrinsicCall(Name, CI, F, Builder);
3895  } else {
3896  llvm_unreachable("Unknown function for CallBase upgrade.");
3897  }
3898 
3899  if (Rep)
3900  CI->replaceAllUsesWith(Rep);
3901  CI->eraseFromParent();
3902  return;
3903  }
3904 
3905  const auto &DefaultCase = [&]() -> void {
3906  if (CI->getFunctionType() == NewFn->getFunctionType()) {
3907  // Handle generic mangling change.
3908  assert(
3909  (CI->getCalledFunction()->getName() != NewFn->getName()) &&
3910  "Unknown function for CallBase upgrade and isn't just a name change");
3911  CI->setCalledFunction(NewFn);
3912  return;
3913  }
3914 
3915  // This must be an upgrade from a named to a literal struct.
3916  if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
3917  assert(OldST != NewFn->getReturnType() &&
3918  "Return type must have changed");
3919  assert(OldST->getNumElements() ==
3920  cast<StructType>(NewFn->getReturnType())->getNumElements() &&
3921  "Must have same number of elements");
3922 
3924  Value *NewCI = Builder.CreateCall(NewFn, Args);
3925  Value *Res = PoisonValue::get(OldST);
3926  for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
3927  Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
3928  Res = Builder.CreateInsertValue(Res, Elem, Idx);
3929  }
3930  CI->replaceAllUsesWith(Res);
3931  CI->eraseFromParent();
3932  return;
3933  }
3934 
3935  // We're probably about to produce something invalid. Let the verifier catch
3936  // it instead of dying here.
3937  CI->setCalledOperand(
3939  return;
3940  };
3941  CallInst *NewCall = nullptr;
3942  switch (NewFn->getIntrinsicID()) {
3943  default: {
3944  DefaultCase();
3945  return;
3946  }
3947  case Intrinsic::arm_neon_vst1:
3948  case Intrinsic::arm_neon_vst2:
3949  case Intrinsic::arm_neon_vst3:
3950  case Intrinsic::arm_neon_vst4:
3951  case Intrinsic::arm_neon_vst2lane:
3952  case Intrinsic::arm_neon_vst3lane:
3953  case Intrinsic::arm_neon_vst4lane: {
3955  NewCall = Builder.CreateCall(NewFn, Args);
3956  break;
3957  }
3958  case Intrinsic::aarch64_sve_ld3_sret:
3959  case Intrinsic::aarch64_sve_ld4_sret:
3960  case Intrinsic::aarch64_sve_ld2_sret: {
3961  StringRef Name = F->getName();
3962  Name = Name.substr(5);
3963  unsigned N = StringSwitch<unsigned>(Name)
3964  .StartsWith("aarch64.sve.ld2", 2)
3965  .StartsWith("aarch64.sve.ld3", 3)
3966  .StartsWith("aarch64.sve.ld4", 4)
3967  .Default(0);
3968  ScalableVectorType *RetTy =
3969  dyn_cast<ScalableVectorType>(F->getReturnType());
3970  unsigned MinElts = RetTy->getMinNumElements() / N;
3972  Value *NewLdCall = Builder.CreateCall(NewFn, Args);
3973  Value *Ret = llvm::PoisonValue::get(RetTy);
3974  for (unsigned I = 0; I < N; I++) {
3975  Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
3976  Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
3977  Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx);
3978  }
3979  NewCall = dyn_cast<CallInst>(Ret);
3980  break;
3981  }
3982 
3983  case Intrinsic::vector_extract: {
3984  StringRef Name = F->getName();
3985  Name = Name.substr(5); // Strip llvm
3986  if (!Name.startswith("aarch64.sve.tuple.get")) {
3987  DefaultCase();
3988  return;
3989  }
3990  ScalableVectorType *RetTy =
3991  dyn_cast<ScalableVectorType>(F->getReturnType());
3992  unsigned MinElts = RetTy->getMinNumElements();
3993  unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3994  Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
3995  NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
3996  break;
3997  }
3998 
3999  case Intrinsic::vector_insert: {
4000  StringRef Name = F->getName();
4001  Name = Name.substr(5);
4002  if (!Name.startswith("aarch64.sve.tuple")) {
4003  DefaultCase();
4004  return;
4005  }
4006  if (Name.startswith("aarch64.sve.tuple.set")) {
4007  unsigned I = dyn_cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4008  ScalableVectorType *Ty =
4009  dyn_cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
4010  Value *NewIdx =
4012  NewCall = Builder.CreateCall(
4013  NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
4014  break;
4015  }
4016  if (Name.startswith("aarch64.sve.tuple.create")) {
4017  unsigned N = StringSwitch<unsigned>(Name)
4018  .StartsWith("aarch64.sve.tuple.create2", 2)
4019  .StartsWith("aarch64.sve.tuple.create3", 3)
4020  .StartsWith("aarch64.sve.tuple.create4", 4)
4021  .Default(0);
4022  assert(N > 1 && "Create is expected to be between 2-4");
4023  ScalableVectorType *RetTy =
4024  dyn_cast<ScalableVectorType>(F->getReturnType());
4025  Value *Ret = llvm::PoisonValue::get(RetTy);
4026  unsigned MinElts = RetTy->getMinNumElements() / N;
4027  for (unsigned I = 0; I < N; I++) {
4028  Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4029  Value *V = CI->getArgOperand(I);
4030  Ret = Builder.CreateInsertVector(RetTy, Ret, V, Idx);
4031  }
4032  NewCall = dyn_cast<CallInst>(Ret);
4033  }
4034  break;
4035  }
4036 
4037  case Intrinsic::arm_neon_bfdot:
4038  case Intrinsic::arm_neon_bfmmla:
4039  case Intrinsic::arm_neon_bfmlalb:
4040  case Intrinsic::arm_neon_bfmlalt:
4041  case Intrinsic::aarch64_neon_bfdot:
4042  case Intrinsic::aarch64_neon_bfmmla:
4043  case Intrinsic::aarch64_neon_bfmlalb:
4044  case Intrinsic::aarch64_neon_bfmlalt: {
4046  assert(CI->arg_size() == 3 &&
4047  "Mismatch between function args and call args");
4048  size_t OperandWidth =
4050  assert((OperandWidth == 64 || OperandWidth == 128) &&
4051  "Unexpected operand width");
4052  Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
4053  auto Iter = CI->args().begin();
4054  Args.push_back(*Iter++);
4055  Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4056  Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4057  NewCall = Builder.CreateCall(NewFn, Args);
4058  break;
4059  }
4060 
4061  case Intrinsic::bitreverse:
4062  NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4063  break;
4064 
4065  case Intrinsic::ctlz:
4066  case Intrinsic::cttz:
4067  assert(CI->arg_size() == 1 &&
4068  "Mismatch between function args and call args");
4069  NewCall =
4070  Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
4071  break;
4072 
4073  case Intrinsic::objectsize: {
4074  Value *NullIsUnknownSize =
4075  CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
4076  Value *Dynamic =
4077  CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
4078  NewCall = Builder.CreateCall(
4079  NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
4080  break;
4081  }
4082 
4083  case Intrinsic::ctpop:
4084  NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4085  break;
4086 
4087  case Intrinsic::convert_from_fp16:
4088  NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4089  break;
4090 
4091  case Intrinsic::dbg_value:
4092  // Upgrade from the old version that had an extra offset argument.
4093  assert(CI->arg_size() == 4);
4094  // Drop nonzero offsets instead of attempting to upgrade them.
4095  if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
4096  if (Offset->isZeroValue()) {
4097  NewCall = Builder.CreateCall(
4098  NewFn,
4099  {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
4100  break;
4101  }
4102  CI->eraseFromParent();
4103  return;
4104 
4105  case Intrinsic::ptr_annotation:
4106  // Upgrade from versions that lacked the annotation attribute argument.
4107  if (CI->arg_size() != 4) {
4108  DefaultCase();
4109  return;
4110  }
4111 
4112  // Create a new call with an added null annotation attribute argument.
4113  NewCall = Builder.CreateCall(
4114  NewFn,
4115  {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
4116  CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
4117  NewCall->takeName(CI);
4118  CI->replaceAllUsesWith(NewCall);
4119  CI->eraseFromParent();
4120  return;
4121 
4122  case Intrinsic::var_annotation:
4123  // Upgrade from versions that lacked the annotation attribute argument.
4124  assert(CI->arg_size() == 4 &&
4125  "Before LLVM 12.0 this intrinsic took four arguments");
4126  // Create a new call with an added null annotation attribute argument.
4127  NewCall = Builder.CreateCall(
4128  NewFn,
4129  {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
4130  CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
4131  CI->eraseFromParent();
4132  return;
4133 
4134  case Intrinsic::x86_xop_vfrcz_ss:
4135  case Intrinsic::x86_xop_vfrcz_sd:
4136  NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
4137  break;
4138 
4139  case Intrinsic::x86_xop_vpermil2pd:
4140  case Intrinsic::x86_xop_vpermil2ps:
4141  case Intrinsic::x86_xop_vpermil2pd_256:
4142  case Intrinsic::x86_xop_vpermil2ps_256: {
4144  VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
4145  VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
4146  Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
4147  NewCall = Builder.CreateCall(NewFn, Args);
4148  break;
4149  }
4150 
4151  case Intrinsic::x86_sse41_ptestc:
4152  case Intrinsic::x86_sse41_ptestz:
4153  case Intrinsic::x86_sse41_ptestnzc: {
4154  // The arguments for these intrinsics used to be v4f32, and changed
4155  // to v2i64. This is purely a nop, since those are bitwise intrinsics.
4156  // So, the only thing required is a bitcast for both arguments.
4157  // First, check the arguments have the old type.
4158  Value *Arg0 = CI->getArgOperand(0);
4159  if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
4160  return;
4161 
4162  // Old intrinsic, add bitcasts
4163  Value *Arg1 = CI->getArgOperand(1);
4164 
4165  auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
4166 
4167  Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
4168  Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
4169 
4170  NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
4171  break;
4172  }
4173 
4174  case Intrinsic::x86_rdtscp: {
4175  // This used to take 1 arguments. If we have no arguments, it is already
4176  // upgraded.
4177  if (CI->getNumOperands() == 0)
4178  return;
4179 
4180  NewCall = Builder.CreateCall(NewFn);
4181  // Extract the second result and store it.
4182  Value *Data = Builder.CreateExtractValue(NewCall, 1);
4183  // Cast the pointer to the right type.
4184  Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
4185  llvm::PointerType::getUnqual(Data->getType()));
4186  Builder.CreateAlignedStore(Data, Ptr, Align(1));
4187  // Replace the original call result with the first result of the new call.
4188  Value *TSC = Builder.CreateExtractValue(NewCall, 0);
4189 
4190  NewCall->takeName(CI);
4191  CI->replaceAllUsesWith(TSC);
4192  CI->eraseFromParent();
4193  return;
4194  }
4195 
4196  case Intrinsic::x86_sse41_insertps:
4197  case Intrinsic::x86_sse41_dppd:
4198  case Intrinsic::x86_sse41_dpps:
4199  case Intrinsic::x86_sse41_mpsadbw:
4200  case Intrinsic::x86_avx_dp_ps_256:
4201  case Intrinsic::x86_avx2_mpsadbw: {
4202  // Need to truncate the last argument from i32 to i8 -- this argument models
4203  // an inherently 8-bit immediate operand to these x86 instructions.
4205 
4206  // Replace the last argument with a trunc.
4207  Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
4208  NewCall = Builder.CreateCall(NewFn, Args);
4209  break;
4210  }
4211 
4212  case Intrinsic::x86_avx512_mask_cmp_pd_128:
4213  case Intrinsic::x86_avx512_mask_cmp_pd_256:
4214  case Intrinsic::x86_avx512_mask_cmp_pd_512:
4215  case Intrinsic::x86_avx512_mask_cmp_ps_128:
4216  case Intrinsic::x86_avx512_mask_cmp_ps_256:
4217  case Intrinsic::x86_avx512_mask_cmp_ps_512: {
4219  unsigned NumElts =
4220  cast<FixedVectorType>(Args[0]->getType())->getNumElements();
4221  Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
4222 
4223  NewCall = Builder.CreateCall(NewFn, Args);
4224  Value *Res = ApplyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
4225 
4226  NewCall->takeName(CI);
4227  CI->replaceAllUsesWith(Res);
4228  CI->eraseFromParent();
4229  return;
4230  }
4231 
4232  case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
4233  case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
4234  case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
4235  case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
4236  case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
4237  case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
4239  unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4240  if (NewFn->getIntrinsicID() ==
4241  Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
4242  Args[1] = Builder.CreateBitCast(
4243  Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4244 
4245  NewCall = Builder.CreateCall(NewFn, Args);
4246  Value *Res = Builder.CreateBitCast(
4247  NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
4248 
4249  NewCall->takeName(CI);
4250  CI->replaceAllUsesWith(Res);
4251  CI->eraseFromParent();
4252  return;
4253  }
4254  case Intrinsic::x86_avx512bf16_dpbf16ps_128:
4255  case Intrinsic::x86_avx512bf16_dpbf16ps_256:
4256  case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
4258  unsigned NumElts =
4259  cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
4260  Args[1] = Builder.CreateBitCast(
4261  Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4262  Args[2] = Builder.CreateBitCast(
4263  Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4264 
4265  NewCall = Builder.CreateCall(NewFn, Args);
4266  break;
4267  }
4268 
4269  case Intrinsic::thread_pointer: {
4270  NewCall = Builder.CreateCall(NewFn, {});
4271  break;
4272  }
4273 
4274  case Intrinsic::invariant_start:
4275  case Intrinsic::invariant_end: {
4277  NewCall = Builder.CreateCall(NewFn, Args);
4278  break;
4279  }
4280  case Intrinsic::masked_load:
4281  case Intrinsic::masked_store:
4282  case Intrinsic::masked_gather:
4283  case Intrinsic::masked_scatter: {
4285  NewCall = Builder.CreateCall(NewFn, Args);
4286  NewCall->copyMetadata(*CI);
4287  break;
4288  }
4289 
4290  case Intrinsic::memcpy:
4291  case Intrinsic::memmove:
4292  case Intrinsic::memset: {
4293  // We have to make sure that the call signature is what we're expecting.
4294  // We only want to change the old signatures by removing the alignment arg:
4295  // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
4296  // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
4297  // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
4298  // -> @llvm.memset...(i8*, i8, i[32|64], i1)
4299  // Note: i8*'s in the above can be any pointer type
4300  if (CI->arg_size() != 5) {
4301  DefaultCase();
4302  return;
4303  }
4304  // Remove alignment argument (3), and add alignment attributes to the
4305  // dest/src pointers.
4306  Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
4307  CI->getArgOperand(2), CI->getArgOperand(4)};
4308  NewCall = Builder.CreateCall(NewFn, Args);
4309  AttributeList OldAttrs = CI->getAttributes();
4310  AttributeList NewAttrs = AttributeList::get(
4311  C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
4312  {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
4313  OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
4314  NewCall->setAttributes(NewAttrs);
4315  auto *MemCI = cast<MemIntrinsic>(NewCall);
4316  // All mem intrinsics support dest alignment.
4317  const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
4318  MemCI->setDestAlignment(Align->getMaybeAlignValue());
4319  // Memcpy/Memmove also support source alignment.
4320  if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
4321  MTI->setSourceAlignment(Align->getMaybeAlignValue());
4322  break;
4323  }
4324  }
4325  assert(NewCall && "Should have either set this variable or returned through "
4326  "the default case");
4327  NewCall->takeName(CI);
4328  CI->replaceAllUsesWith(NewCall);
4329  CI->eraseFromParent();
4330 }
4331 
4333  assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
4334 
4335  // Check if this function should be upgraded and get the replacement function
4336  // if there is one.
4337  Function *NewFn;
4338  if (UpgradeIntrinsicFunction(F, NewFn)) {
4339  // Replace all users of the old function with the new function or new
4340  // instructions. This is not a range loop because the call is deleted.
4341  for (User *U : make_early_inc_range(F->users()))
4342  if (CallBase *CB = dyn_cast<CallBase>(U))
4343  UpgradeIntrinsicCall(CB, NewFn);
4344 
4345  // Remove old function, no longer used, from the module.
4346  F->eraseFromParent();
4347  }
4348 }
4349 
4351  // Check if the tag uses struct-path aware TBAA format.
4352  if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
4353  return &MD;
4354 
4355  auto &Context = MD.getContext();
4356  if (MD.getNumOperands() == 3) {
4357  Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
4358  MDNode *ScalarType = MDNode::get(Context, Elts);
4359  // Create a MDNode <ScalarType, ScalarType, offset 0, const>
4360  Metadata *Elts2[] = {ScalarType, ScalarType,
4363  MD.getOperand(2)};
4364  return MDNode::get(Context, Elts2);
4365  }
4366  // Create a MDNode <MD, MD, offset 0>
4369  return MDNode::get(Context, Elts);
4370 }
4371 
4372 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
4373  Instruction *&Temp) {
4374  if (Opc != Instruction::BitCast)
4375  return nullptr;
4376 
4377  Temp = nullptr;
4378  Type *SrcTy = V->getType();
4379  if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4380  SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4381  LLVMContext &Context = V->getContext();
4382 
4383  // We have no information about target data layout, so we assume that
4384  // the maximum pointer size is 64bit.
4385  Type *MidTy = Type::getInt64Ty(Context);
4386  Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
4387 
4388  return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
4389  }
4390 
4391  return nullptr;
4392 }
4393 
4394 Constant *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
4395  if (Opc != Instruction::BitCast)
4396  return nullptr;
4397 
4398  Type *SrcTy = C->getType();
4399  if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4400  SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4401  LLVMContext &Context = C->getContext();
4402 
4403  // We have no information about target data layout, so we assume that
4404  // the maximum pointer size is 64bit.
4405  Type *MidTy = Type::getInt64Ty(Context);
4406 
4408  DestTy);
4409  }
4410 
4411  return nullptr;
4412 }
4413 
4414 /// Check the debug info version number, if it is out-dated, drop the debug
4415 /// info. Return true if module is modified.
4419  bool BrokenDebugInfo = false;
4420  if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
4421  report_fatal_error("Broken module found, compilation aborted!");
4422  if (!BrokenDebugInfo)
4423  // Everything is ok.
4424  return false;
4425  else {
4426  // Diagnose malformed debug info.
4428  M.getContext().diagnose(Diag);
4429  }
4430  }
4431  bool Modified = StripDebugInfo(M);
4433  // Diagnose a version mismatch.
4435  M.getContext().diagnose(DiagVersion);
4436  }
4437  return Modified;
4438 }
4439 
4440 /// This checks for objc retain release marker which should be upgraded. It
4441 /// returns true if module is modified.
4443  bool Changed = false;
4444  const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
4445  NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
4446  if (ModRetainReleaseMarker) {
4447  MDNode *Op = ModRetainReleaseMarker->getOperand(0);
4448  if (Op) {
4449  MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
4450  if (ID) {
4451  SmallVector<StringRef, 4> ValueComp;
4452  ID->getString().split(ValueComp, "#");
4453  if (ValueComp.size() == 2) {
4454  std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
4455  ID = MDString::get(M.getContext(), NewValue);
4456  }
4457  M.addModuleFlag(Module::Error, MarkerKey, ID);
4458  M.eraseNamedMetadata(ModRetainReleaseMarker);
4459  Changed = true;
4460  }
4461  }
4462  }
4463  return Changed;
4464 }
4465 
4467  // This lambda converts normal function calls to ARC runtime functions to
4468  // intrinsic calls.
4469  auto UpgradeToIntrinsic = [&](const char *OldFunc,
4470  llvm::Intrinsic::ID IntrinsicFunc) {
4471  Function *Fn = M.getFunction(OldFunc);
4472 
4473  if (!Fn)
4474  return;
4475 
4476  Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc);
4477 
4478  for (User *U : make_early_inc_range(Fn->users())) {
4479  CallInst *CI = dyn_cast<CallInst>(U);
4480  if (!CI || CI->getCalledFunction() != Fn)
4481  continue;
4482 
4483  IRBuilder<> Builder(CI->getParent(), CI->getIterator());
4484  FunctionType *NewFuncTy = NewFn->getFunctionType();
4486 
4487  // Don't upgrade the intrinsic if it's not valid to bitcast the return
4488  // value to the return type of the old function.
4489  if (NewFuncTy->getReturnType() != CI->getType() &&
4490  !CastInst::castIsValid(Instruction::BitCast, CI,
4491  NewFuncTy->getReturnType()))
4492  continue;
4493 
4494  bool InvalidCast = false;
4495 
4496  for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
4497  Value *Arg = CI->getArgOperand(I);
4498 
4499  // Bitcast argument to the parameter type of the new function if it's
4500  // not a variadic argument.
4501  if (I < NewFuncTy->getNumParams()) {
4502  // Don't upgrade the intrinsic if it's not valid to bitcast the argument
4503  // to the parameter type of the new function.
4504  if (!CastInst::castIsValid(Instruction::BitCast, Arg,
4505  NewFuncTy->getParamType(I))) {
4506  InvalidCast = true;
4507  break;
4508  }
4509  Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
4510  }
4511  Args.push_back(Arg);
4512  }
4513 
4514  if (InvalidCast)
4515  continue;
4516 
4517  // Create a call instruction that calls the new function.
4518  CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
4519  NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4520  NewCall->takeName(CI);
4521 
4522  // Bitcast the return value back to the type of the old call.
4523  Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
4524 
4525  if (!CI->use_empty())
4526  CI->replaceAllUsesWith(NewRetVal);
4527  CI->eraseFromParent();
4528  }
4529 
4530  if (Fn->use_empty())
4531  Fn->eraseFromParent();
4532  };
4533 
4534  // Unconditionally convert a call to "clang.arc.use" to a call to
4535  // "llvm.objc.clang.arc.use".
4536  UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
4537 
4538  // Upgrade the retain release marker. If there is no need to upgrade
4539  // the marker, that means either the module is already new enough to contain
4540  // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
4542  return;
4543 
4544  std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
4545  {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
4546  {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
4547  {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
4548  {"objc_autoreleaseReturnValue",
4549  llvm::Intrinsic::objc_autoreleaseReturnValue},
4550  {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
4551  {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
4552  {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
4553  {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
4554  {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
4555  {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
4556  {"objc_release", llvm::Intrinsic::objc_release},
4557  {"objc_retain", llvm::Intrinsic::objc_retain},
4558  {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
4559  {"objc_retainAutoreleaseReturnValue",
4560  llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
4561  {"objc_retainAutoreleasedReturnValue",
4562  llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
4563  {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
4564  {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
4565  {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
4566  {"objc_unsafeClaimAutoreleasedReturnValue",
4567  llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
4568  {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
4569  {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
4570  {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
4571  {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
4572  {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
4573  {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
4574  {"objc_arc_annotation_topdown_bbstart",
4575  llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
4576  {"objc_arc_annotation_topdown_bbend",
4577  llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
4578  {"objc_arc_annotation_bottomup_bbstart",
4579  llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
4580  {"objc_arc_annotation_bottomup_bbend",
4581  llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
4582 
4583  for (auto &I : RuntimeFuncs)
4584  UpgradeToIntrinsic(I.first, I.second);
4585 }
4586 
4588  NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
4589  if (!ModFlags)
4590  return false;
4591 
4592  bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
4593  bool HasSwiftVersionFlag = false;
4594  uint8_t SwiftMajorVersion, SwiftMinorVersion;
4595  uint32_t SwiftABIVersion;
4596  auto Int8Ty = Type::getInt8Ty(M.getContext());
4597  auto Int32Ty = Type::getInt32Ty(M.getContext());
4598 
4599  for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
4600  MDNode *Op = ModFlags->getOperand(I);
4601  if (Op->getNumOperands() != 3)