LLVM  15.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the auto-upgrade helper functions.
10 // This is where deprecated IR intrinsics and other IR features are updated to
11 // current specifications.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/IR/AutoUpgrade.h"
16 #include "llvm/ADT/StringSwitch.h"
17 #include "llvm/ADT/Triple.h"
18 #include "llvm/IR/Constants.h"
19 #include "llvm/IR/DebugInfo.h"
20 #include "llvm/IR/DiagnosticInfo.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/IRBuilder.h"
23 #include "llvm/IR/InstVisitor.h"
24 #include "llvm/IR/Instruction.h"
25 #include "llvm/IR/IntrinsicInst.h"
26 #include "llvm/IR/Intrinsics.h"
27 #include "llvm/IR/IntrinsicsAArch64.h"
28 #include "llvm/IR/IntrinsicsARM.h"
29 #include "llvm/IR/IntrinsicsX86.h"
30 #include "llvm/IR/LLVMContext.h"
31 #include "llvm/IR/Module.h"
32 #include "llvm/IR/Verifier.h"
34 #include "llvm/Support/Regex.h"
35 #include <cstring>
36 using namespace llvm;
37 
38 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
39 
40 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
41 // changed their type from v4f32 to v2i64.
43  Function *&NewFn) {
44  // Check whether this is an old version of the function, which received
45  // v4f32 arguments.
46  Type *Arg0Type = F->getFunctionType()->getParamType(0);
47  if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
48  return false;
49 
50  // Yes, it's old, replace it with new version.
51  rename(F);
52  NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
53  return true;
54 }
55 
56 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
57 // arguments have changed their type from i32 to i8.
59  Function *&NewFn) {
60  // Check that the last argument is an i32.
61  Type *LastArgType = F->getFunctionType()->getParamType(
62  F->getFunctionType()->getNumParams() - 1);
63  if (!LastArgType->isIntegerTy(32))
64  return false;
65 
66  // Move this function aside and map down.
67  rename(F);
68  NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
69  return true;
70 }
71 
72 // Upgrade the declaration of fp compare intrinsics that change return type
73 // from scalar to vXi1 mask.
75  Function *&NewFn) {
76  // Check if the return type is a vector.
77  if (F->getReturnType()->isVectorTy())
78  return false;
79 
80  rename(F);
81  NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
82  return true;
83 }
84 
86  // All of the intrinsics matches below should be marked with which llvm
87  // version started autoupgrading them. At some point in the future we would
88  // like to use this information to remove upgrade code for some older
89  // intrinsics. It is currently undecided how we will determine that future
90  // point.
91  if (Name == "addcarryx.u32" || // Added in 8.0
92  Name == "addcarryx.u64" || // Added in 8.0
93  Name == "addcarry.u32" || // Added in 8.0
94  Name == "addcarry.u64" || // Added in 8.0
95  Name == "subborrow.u32" || // Added in 8.0
96  Name == "subborrow.u64" || // Added in 8.0
97  Name.startswith("sse2.padds.") || // Added in 8.0
98  Name.startswith("sse2.psubs.") || // Added in 8.0
99  Name.startswith("sse2.paddus.") || // Added in 8.0
100  Name.startswith("sse2.psubus.") || // Added in 8.0
101  Name.startswith("avx2.padds.") || // Added in 8.0
102  Name.startswith("avx2.psubs.") || // Added in 8.0
103  Name.startswith("avx2.paddus.") || // Added in 8.0
104  Name.startswith("avx2.psubus.") || // Added in 8.0
105  Name.startswith("avx512.padds.") || // Added in 8.0
106  Name.startswith("avx512.psubs.") || // Added in 8.0
107  Name.startswith("avx512.mask.padds.") || // Added in 8.0
108  Name.startswith("avx512.mask.psubs.") || // Added in 8.0
109  Name.startswith("avx512.mask.paddus.") || // Added in 8.0
110  Name.startswith("avx512.mask.psubus.") || // Added in 8.0
111  Name=="ssse3.pabs.b.128" || // Added in 6.0
112  Name=="ssse3.pabs.w.128" || // Added in 6.0
113  Name=="ssse3.pabs.d.128" || // Added in 6.0
114  Name.startswith("fma4.vfmadd.s") || // Added in 7.0
115  Name.startswith("fma.vfmadd.") || // Added in 7.0
116  Name.startswith("fma.vfmsub.") || // Added in 7.0
117  Name.startswith("fma.vfmsubadd.") || // Added in 7.0
118  Name.startswith("fma.vfnmadd.") || // Added in 7.0
119  Name.startswith("fma.vfnmsub.") || // Added in 7.0
120  Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0
121  Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
122  Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
123  Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
124  Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
125  Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
126  Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
127  Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
128  Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
129  Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
130  Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
131  Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
132  Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
133  Name.startswith("avx512.kunpck") || //added in 6.0
134  Name.startswith("avx2.pabs.") || // Added in 6.0
135  Name.startswith("avx512.mask.pabs.") || // Added in 6.0
136  Name.startswith("avx512.broadcastm") || // Added in 6.0
137  Name == "sse.sqrt.ss" || // Added in 7.0
138  Name == "sse2.sqrt.sd" || // Added in 7.0
139  Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0
140  Name.startswith("avx.sqrt.p") || // Added in 7.0
141  Name.startswith("sse2.sqrt.p") || // Added in 7.0
142  Name.startswith("sse.sqrt.p") || // Added in 7.0
143  Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
144  Name.startswith("sse2.pcmpeq.") || // Added in 3.1
145  Name.startswith("sse2.pcmpgt.") || // Added in 3.1
146  Name.startswith("avx2.pcmpeq.") || // Added in 3.1
147  Name.startswith("avx2.pcmpgt.") || // Added in 3.1
148  Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
149  Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
150  Name.startswith("avx.vperm2f128.") || // Added in 6.0
151  Name == "avx2.vperm2i128" || // Added in 6.0
152  Name == "sse.add.ss" || // Added in 4.0
153  Name == "sse2.add.sd" || // Added in 4.0
154  Name == "sse.sub.ss" || // Added in 4.0
155  Name == "sse2.sub.sd" || // Added in 4.0
156  Name == "sse.mul.ss" || // Added in 4.0
157  Name == "sse2.mul.sd" || // Added in 4.0
158  Name == "sse.div.ss" || // Added in 4.0
159  Name == "sse2.div.sd" || // Added in 4.0
160  Name == "sse41.pmaxsb" || // Added in 3.9
161  Name == "sse2.pmaxs.w" || // Added in 3.9
162  Name == "sse41.pmaxsd" || // Added in 3.9
163  Name == "sse2.pmaxu.b" || // Added in 3.9
164  Name == "sse41.pmaxuw" || // Added in 3.9
165  Name == "sse41.pmaxud" || // Added in 3.9
166  Name == "sse41.pminsb" || // Added in 3.9
167  Name == "sse2.pmins.w" || // Added in 3.9
168  Name == "sse41.pminsd" || // Added in 3.9
169  Name == "sse2.pminu.b" || // Added in 3.9
170  Name == "sse41.pminuw" || // Added in 3.9
171  Name == "sse41.pminud" || // Added in 3.9
172  Name == "avx512.kand.w" || // Added in 7.0
173  Name == "avx512.kandn.w" || // Added in 7.0
174  Name == "avx512.knot.w" || // Added in 7.0
175  Name == "avx512.kor.w" || // Added in 7.0
176  Name == "avx512.kxor.w" || // Added in 7.0
177  Name == "avx512.kxnor.w" || // Added in 7.0
178  Name == "avx512.kortestc.w" || // Added in 7.0
179  Name == "avx512.kortestz.w" || // Added in 7.0
180  Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
181  Name.startswith("avx2.pmax") || // Added in 3.9
182  Name.startswith("avx2.pmin") || // Added in 3.9
183  Name.startswith("avx512.mask.pmax") || // Added in 4.0
184  Name.startswith("avx512.mask.pmin") || // Added in 4.0
185  Name.startswith("avx2.vbroadcast") || // Added in 3.8
186  Name.startswith("avx2.pbroadcast") || // Added in 3.8
187  Name.startswith("avx.vpermil.") || // Added in 3.1
188  Name.startswith("sse2.pshuf") || // Added in 3.9
189  Name.startswith("avx512.pbroadcast") || // Added in 3.9
190  Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
191  Name.startswith("avx512.mask.movddup") || // Added in 3.9
192  Name.startswith("avx512.mask.movshdup") || // Added in 3.9
193  Name.startswith("avx512.mask.movsldup") || // Added in 3.9
194  Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
195  Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
196  Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
197  Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
198  Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
199  Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
200  Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
201  Name.startswith("avx512.mask.punpckl") || // Added in 3.9
202  Name.startswith("avx512.mask.punpckh") || // Added in 3.9
203  Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
204  Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
205  Name.startswith("avx512.mask.pand.") || // Added in 3.9
206  Name.startswith("avx512.mask.pandn.") || // Added in 3.9
207  Name.startswith("avx512.mask.por.") || // Added in 3.9
208  Name.startswith("avx512.mask.pxor.") || // Added in 3.9
209  Name.startswith("avx512.mask.and.") || // Added in 3.9
210  Name.startswith("avx512.mask.andn.") || // Added in 3.9
211  Name.startswith("avx512.mask.or.") || // Added in 3.9
212  Name.startswith("avx512.mask.xor.") || // Added in 3.9
213  Name.startswith("avx512.mask.padd.") || // Added in 4.0
214  Name.startswith("avx512.mask.psub.") || // Added in 4.0
215  Name.startswith("avx512.mask.pmull.") || // Added in 4.0
216  Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
217  Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
218  Name.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0
219  Name.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0
220  Name.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0
221  Name.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0
222  Name == "avx512.mask.vcvtph2ps.128" || // Added in 11.0
223  Name == "avx512.mask.vcvtph2ps.256" || // Added in 11.0
224  Name == "avx512.mask.cvtqq2ps.256" || // Added in 9.0
225  Name == "avx512.mask.cvtqq2ps.512" || // Added in 9.0
226  Name == "avx512.mask.cvtuqq2ps.256" || // Added in 9.0
227  Name == "avx512.mask.cvtuqq2ps.512" || // Added in 9.0
228  Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0
229  Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0
230  Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0
231  Name == "avx512.mask.cvttps2dq.128" || // Added in 7.0
232  Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0
233  Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
234  Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
235  Name == "avx512.cvtusi2sd" || // Added in 7.0
236  Name.startswith("avx512.mask.permvar.") || // Added in 7.0
237  Name == "sse2.pmulu.dq" || // Added in 7.0
238  Name == "sse41.pmuldq" || // Added in 7.0
239  Name == "avx2.pmulu.dq" || // Added in 7.0
240  Name == "avx2.pmul.dq" || // Added in 7.0
241  Name == "avx512.pmulu.dq.512" || // Added in 7.0
242  Name == "avx512.pmul.dq.512" || // Added in 7.0
243  Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
244  Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
245  Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
246  Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
247  Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
248  Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
249  Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
250  Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
251  Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
252  Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
253  Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
254  Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
255  Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
256  Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
257  Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
258  Name.startswith("avx512.cmp.p") || // Added in 12.0
259  Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
260  Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
261  Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
262  Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
263  Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
264  Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
265  Name.startswith("avx512.mask.psll.d") || // Added in 4.0
266  Name.startswith("avx512.mask.psll.q") || // Added in 4.0
267  Name.startswith("avx512.mask.psll.w") || // Added in 4.0
268  Name.startswith("avx512.mask.psra.d") || // Added in 4.0
269  Name.startswith("avx512.mask.psra.q") || // Added in 4.0
270  Name.startswith("avx512.mask.psra.w") || // Added in 4.0
271  Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
272  Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
273  Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
274  Name.startswith("avx512.mask.pslli") || // Added in 4.0
275  Name.startswith("avx512.mask.psrai") || // Added in 4.0
276  Name.startswith("avx512.mask.psrli") || // Added in 4.0
277  Name.startswith("avx512.mask.psllv") || // Added in 4.0
278  Name.startswith("avx512.mask.psrav") || // Added in 4.0
279  Name.startswith("avx512.mask.psrlv") || // Added in 4.0
280  Name.startswith("sse41.pmovsx") || // Added in 3.8
281  Name.startswith("sse41.pmovzx") || // Added in 3.9
282  Name.startswith("avx2.pmovsx") || // Added in 3.9
283  Name.startswith("avx2.pmovzx") || // Added in 3.9
284  Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
285  Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
286  Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
287  Name.startswith("avx512.mask.pternlog.") || // Added in 7.0
288  Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
289  Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0
290  Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
291  Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
292  Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
293  Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
294  Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
295  Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
296  Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
297  Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
298  Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
299  Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
300  Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
301  Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
302  Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
303  Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
304  Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
305  Name.startswith("avx512.mask.vpshldv.") || // Added in 8.0
306  Name.startswith("avx512.mask.vpshrdv.") || // Added in 8.0
307  Name.startswith("avx512.maskz.vpshldv.") || // Added in 8.0
308  Name.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0
309  Name.startswith("avx512.vpshld.") || // Added in 8.0
310  Name.startswith("avx512.vpshrd.") || // Added in 8.0
311  Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
312  Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
313  Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
314  Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
315  Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
316  Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
317  Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
318  Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
319  Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
320  Name.startswith("avx512.mask.conflict.") || // Added in 9.0
321  Name == "avx512.mask.pmov.qd.256" || // Added in 9.0
322  Name == "avx512.mask.pmov.qd.512" || // Added in 9.0
323  Name == "avx512.mask.pmov.wb.256" || // Added in 9.0
324  Name == "avx512.mask.pmov.wb.512" || // Added in 9.0
325  Name == "sse.cvtsi2ss" || // Added in 7.0
326  Name == "sse.cvtsi642ss" || // Added in 7.0
327  Name == "sse2.cvtsi2sd" || // Added in 7.0
328  Name == "sse2.cvtsi642sd" || // Added in 7.0
329  Name == "sse2.cvtss2sd" || // Added in 7.0
330  Name == "sse2.cvtdq2pd" || // Added in 3.9
331  Name == "sse2.cvtdq2ps" || // Added in 7.0
332  Name == "sse2.cvtps2pd" || // Added in 3.9
333  Name == "avx.cvtdq2.pd.256" || // Added in 3.9
334  Name == "avx.cvtdq2.ps.256" || // Added in 7.0
335  Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
336  Name.startswith("vcvtph2ps.") || // Added in 11.0
337  Name.startswith("avx.vinsertf128.") || // Added in 3.7
338  Name == "avx2.vinserti128" || // Added in 3.7
339  Name.startswith("avx512.mask.insert") || // Added in 4.0
340  Name.startswith("avx.vextractf128.") || // Added in 3.7
341  Name == "avx2.vextracti128" || // Added in 3.7
342  Name.startswith("avx512.mask.vextract") || // Added in 4.0
343  Name.startswith("sse4a.movnt.") || // Added in 3.9
344  Name.startswith("avx.movnt.") || // Added in 3.2
345  Name.startswith("avx512.storent.") || // Added in 3.9
346  Name == "sse41.movntdqa" || // Added in 5.0
347  Name == "avx2.movntdqa" || // Added in 5.0
348  Name == "avx512.movntdqa" || // Added in 5.0
349  Name == "sse2.storel.dq" || // Added in 3.9
350  Name.startswith("sse.storeu.") || // Added in 3.9
351  Name.startswith("sse2.storeu.") || // Added in 3.9
352  Name.startswith("avx.storeu.") || // Added in 3.9
353  Name.startswith("avx512.mask.storeu.") || // Added in 3.9
354  Name.startswith("avx512.mask.store.p") || // Added in 3.9
355  Name.startswith("avx512.mask.store.b.") || // Added in 3.9
356  Name.startswith("avx512.mask.store.w.") || // Added in 3.9
357  Name.startswith("avx512.mask.store.d.") || // Added in 3.9
358  Name.startswith("avx512.mask.store.q.") || // Added in 3.9
359  Name == "avx512.mask.store.ss" || // Added in 7.0
360  Name.startswith("avx512.mask.loadu.") || // Added in 3.9
361  Name.startswith("avx512.mask.load.") || // Added in 3.9
362  Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
363  Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
364  Name.startswith("avx512.mask.expand.b") || // Added in 9.0
365  Name.startswith("avx512.mask.expand.w") || // Added in 9.0
366  Name.startswith("avx512.mask.expand.d") || // Added in 9.0
367  Name.startswith("avx512.mask.expand.q") || // Added in 9.0
368  Name.startswith("avx512.mask.expand.p") || // Added in 9.0
369  Name.startswith("avx512.mask.compress.b") || // Added in 9.0
370  Name.startswith("avx512.mask.compress.w") || // Added in 9.0
371  Name.startswith("avx512.mask.compress.d") || // Added in 9.0
372  Name.startswith("avx512.mask.compress.q") || // Added in 9.0
373  Name.startswith("avx512.mask.compress.p") || // Added in 9.0
374  Name == "sse42.crc32.64.8" || // Added in 3.4
375  Name.startswith("avx.vbroadcast.s") || // Added in 3.5
376  Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
377  Name.startswith("avx512.mask.palignr.") || // Added in 3.9
378  Name.startswith("avx512.mask.valign.") || // Added in 4.0
379  Name.startswith("sse2.psll.dq") || // Added in 3.7
380  Name.startswith("sse2.psrl.dq") || // Added in 3.7
381  Name.startswith("avx2.psll.dq") || // Added in 3.7
382  Name.startswith("avx2.psrl.dq") || // Added in 3.7
383  Name.startswith("avx512.psll.dq") || // Added in 3.9
384  Name.startswith("avx512.psrl.dq") || // Added in 3.9
385  Name == "sse41.pblendw" || // Added in 3.7
386  Name.startswith("sse41.blendp") || // Added in 3.7
387  Name.startswith("avx.blend.p") || // Added in 3.7
388  Name == "avx2.pblendw" || // Added in 3.7
389  Name.startswith("avx2.pblendd.") || // Added in 3.7
390  Name.startswith("avx.vbroadcastf128") || // Added in 4.0
391  Name == "avx2.vbroadcasti128" || // Added in 3.7
392  Name.startswith("avx512.mask.broadcastf32x4.") || // Added in 6.0
393  Name.startswith("avx512.mask.broadcastf64x2.") || // Added in 6.0
394  Name.startswith("avx512.mask.broadcastf32x8.") || // Added in 6.0
395  Name.startswith("avx512.mask.broadcastf64x4.") || // Added in 6.0
396  Name.startswith("avx512.mask.broadcasti32x4.") || // Added in 6.0
397  Name.startswith("avx512.mask.broadcasti64x2.") || // Added in 6.0
398  Name.startswith("avx512.mask.broadcasti32x8.") || // Added in 6.0
399  Name.startswith("avx512.mask.broadcasti64x4.") || // Added in 6.0
400  Name == "xop.vpcmov" || // Added in 3.8
401  Name == "xop.vpcmov.256" || // Added in 5.0
402  Name.startswith("avx512.mask.move.s") || // Added in 4.0
403  Name.startswith("avx512.cvtmask2") || // Added in 5.0
404  Name.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0
405  Name.startswith("xop.vprot") || // Added in 8.0
406  Name.startswith("avx512.prol") || // Added in 8.0
407  Name.startswith("avx512.pror") || // Added in 8.0
408  Name.startswith("avx512.mask.prorv.") || // Added in 8.0
409  Name.startswith("avx512.mask.pror.") || // Added in 8.0
410  Name.startswith("avx512.mask.prolv.") || // Added in 8.0
411  Name.startswith("avx512.mask.prol.") || // Added in 8.0
412  Name.startswith("avx512.ptestm") || //Added in 6.0
413  Name.startswith("avx512.ptestnm") || //Added in 6.0
414  Name.startswith("avx512.mask.pavg")) // Added in 6.0
415  return true;
416 
417  return false;
418 }
419 
421  Function *&NewFn) {
422  // Only handle intrinsics that start with "x86.".
423  if (!Name.startswith("x86."))
424  return false;
425  // Remove "x86." prefix.
426  Name = Name.substr(4);
427 
429  NewFn = nullptr;
430  return true;
431  }
432 
433  if (Name == "rdtscp") { // Added in 8.0
434  // If this intrinsic has 0 operands, it's the new version.
435  if (F->getFunctionType()->getNumParams() == 0)
436  return false;
437 
438  rename(F);
439  NewFn = Intrinsic::getDeclaration(F->getParent(),
440  Intrinsic::x86_rdtscp);
441  return true;
442  }
443 
444  // SSE4.1 ptest functions may have an old signature.
445  if (Name.startswith("sse41.ptest")) { // Added in 3.2
446  if (Name.substr(11) == "c")
447  return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
448  if (Name.substr(11) == "z")
449  return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
450  if (Name.substr(11) == "nzc")
451  return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
452  }
453  // Several blend and other instructions with masks used the wrong number of
454  // bits.
455  if (Name == "sse41.insertps") // Added in 3.6
456  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
457  NewFn);
458  if (Name == "sse41.dppd") // Added in 3.6
459  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
460  NewFn);
461  if (Name == "sse41.dpps") // Added in 3.6
462  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
463  NewFn);
464  if (Name == "sse41.mpsadbw") // Added in 3.6
465  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
466  NewFn);
467  if (Name == "avx.dp.ps.256") // Added in 3.6
468  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
469  NewFn);
470  if (Name == "avx2.mpsadbw") // Added in 3.6
471  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
472  NewFn);
473  if (Name == "avx512.mask.cmp.pd.128") // Added in 7.0
474  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_128,
475  NewFn);
476  if (Name == "avx512.mask.cmp.pd.256") // Added in 7.0
477  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_256,
478  NewFn);
479  if (Name == "avx512.mask.cmp.pd.512") // Added in 7.0
480  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_512,
481  NewFn);
482  if (Name == "avx512.mask.cmp.ps.128") // Added in 7.0
483  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_128,
484  NewFn);
485  if (Name == "avx512.mask.cmp.ps.256") // Added in 7.0
486  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_256,
487  NewFn);
488  if (Name == "avx512.mask.cmp.ps.512") // Added in 7.0
489  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_512,
490  NewFn);
491 
492  // frcz.ss/sd may need to have an argument dropped. Added in 3.2
493  if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
494  rename(F);
495  NewFn = Intrinsic::getDeclaration(F->getParent(),
496  Intrinsic::x86_xop_vfrcz_ss);
497  return true;
498  }
499  if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
500  rename(F);
501  NewFn = Intrinsic::getDeclaration(F->getParent(),
502  Intrinsic::x86_xop_vfrcz_sd);
503  return true;
504  }
505  // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
506  if (Name.startswith("xop.vpermil2")) { // Added in 3.9
507  auto Idx = F->getFunctionType()->getParamType(2);
508  if (Idx->isFPOrFPVectorTy()) {
509  rename(F);
510  unsigned IdxSize = Idx->getPrimitiveSizeInBits();
511  unsigned EltSize = Idx->getScalarSizeInBits();
512  Intrinsic::ID Permil2ID;
513  if (EltSize == 64 && IdxSize == 128)
514  Permil2ID = Intrinsic::x86_xop_vpermil2pd;
515  else if (EltSize == 32 && IdxSize == 128)
516  Permil2ID = Intrinsic::x86_xop_vpermil2ps;
517  else if (EltSize == 64 && IdxSize == 256)
518  Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
519  else
520  Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
521  NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
522  return true;
523  }
524  }
525 
526  if (Name == "seh.recoverfp") {
527  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
528  return true;
529  }
530 
531  return false;
532 }
533 
535  assert(F && "Illegal to upgrade a non-existent Function.");
536 
537  // Quickly eliminate it, if it's not a candidate.
538  StringRef Name = F->getName();
539  if (Name.size() <= 8 || !Name.startswith("llvm."))
540  return false;
541  Name = Name.substr(5); // Strip off "llvm."
542 
543  switch (Name[0]) {
544  default: break;
545  case 'a': {
546  if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
547  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
548  F->arg_begin()->getType());
549  return true;
550  }
551  if (Name.startswith("aarch64.neon.frintn")) {
552  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::roundeven,
553  F->arg_begin()->getType());
554  return true;
555  }
556  if (Name.startswith("aarch64.neon.rbit")) {
557  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
558  F->arg_begin()->getType());
559  return true;
560  }
561  if (Name.startswith("arm.neon.vclz")) {
562  Type* args[2] = {
563  F->arg_begin()->getType(),
564  Type::getInt1Ty(F->getContext())
565  };
566  // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
567  // the end of the name. Change name from llvm.arm.neon.vclz.* to
568  // llvm.ctlz.*
569  FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
570  NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
571  "llvm.ctlz." + Name.substr(14), F->getParent());
572  return true;
573  }
574  if (Name.startswith("arm.neon.vcnt")) {
575  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
576  F->arg_begin()->getType());
577  return true;
578  }
579  static const Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
580  if (vstRegex.match(Name)) {
581  static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
582  Intrinsic::arm_neon_vst2,
583  Intrinsic::arm_neon_vst3,
584  Intrinsic::arm_neon_vst4};
585 
586  static const Intrinsic::ID StoreLaneInts[] = {
587  Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
588  Intrinsic::arm_neon_vst4lane
589  };
590 
591  auto fArgs = F->getFunctionType()->params();
592  Type *Tys[] = {fArgs[0], fArgs[1]};
593  if (!Name.contains("lane"))
594  NewFn = Intrinsic::getDeclaration(F->getParent(),
595  StoreInts[fArgs.size() - 3], Tys);
596  else
597  NewFn = Intrinsic::getDeclaration(F->getParent(),
598  StoreLaneInts[fArgs.size() - 5], Tys);
599  return true;
600  }
601  if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
602  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
603  return true;
604  }
605  if (Name.startswith("arm.neon.vqadds.")) {
606  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::sadd_sat,
607  F->arg_begin()->getType());
608  return true;
609  }
610  if (Name.startswith("arm.neon.vqaddu.")) {
611  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::uadd_sat,
612  F->arg_begin()->getType());
613  return true;
614  }
615  if (Name.startswith("arm.neon.vqsubs.")) {
616  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ssub_sat,
617  F->arg_begin()->getType());
618  return true;
619  }
620  if (Name.startswith("arm.neon.vqsubu.")) {
621  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::usub_sat,
622  F->arg_begin()->getType());
623  return true;
624  }
625  if (Name.startswith("aarch64.neon.addp")) {
626  if (F->arg_size() != 2)
627  break; // Invalid IR.
628  VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
629  if (Ty && Ty->getElementType()->isFloatingPointTy()) {
630  NewFn = Intrinsic::getDeclaration(F->getParent(),
631  Intrinsic::aarch64_neon_faddp, Ty);
632  return true;
633  }
634  }
635 
636  // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and v16i8
637  // respectively
638  if ((Name.startswith("arm.neon.bfdot.") ||
639  Name.startswith("aarch64.neon.bfdot.")) &&
640  Name.endswith("i8")) {
641  Intrinsic::ID IID =
643  .Cases("arm.neon.bfdot.v2f32.v8i8",
644  "arm.neon.bfdot.v4f32.v16i8",
645  Intrinsic::arm_neon_bfdot)
646  .Cases("aarch64.neon.bfdot.v2f32.v8i8",
647  "aarch64.neon.bfdot.v4f32.v16i8",
648  Intrinsic::aarch64_neon_bfdot)
650  if (IID == Intrinsic::not_intrinsic)
651  break;
652 
653  size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
654  assert((OperandWidth == 64 || OperandWidth == 128) &&
655  "Unexpected operand width");
656  LLVMContext &Ctx = F->getParent()->getContext();
657  std::array<Type *, 2> Tys {{
658  F->getReturnType(),
659  FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)
660  }};
661  NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
662  return true;
663  }
664 
665  // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic anymore
666  // and accept v8bf16 instead of v16i8
667  if ((Name.startswith("arm.neon.bfm") ||
668  Name.startswith("aarch64.neon.bfm")) &&
669  Name.endswith(".v4f32.v16i8")) {
670  Intrinsic::ID IID =
672  .Case("arm.neon.bfmmla.v4f32.v16i8",
673  Intrinsic::arm_neon_bfmmla)
674  .Case("arm.neon.bfmlalb.v4f32.v16i8",
675  Intrinsic::arm_neon_bfmlalb)
676  .Case("arm.neon.bfmlalt.v4f32.v16i8",
677  Intrinsic::arm_neon_bfmlalt)
678  .Case("aarch64.neon.bfmmla.v4f32.v16i8",
679  Intrinsic::aarch64_neon_bfmmla)
680  .Case("aarch64.neon.bfmlalb.v4f32.v16i8",
681  Intrinsic::aarch64_neon_bfmlalb)
682  .Case("aarch64.neon.bfmlalt.v4f32.v16i8",
683  Intrinsic::aarch64_neon_bfmlalt)
685  if (IID == Intrinsic::not_intrinsic)
686  break;
687 
688  std::array<Type *, 0> Tys;
689  NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
690  return true;
691  }
692 
693  if (Name == "arm.mve.vctp64" &&
694  cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
695  // A vctp64 returning a v4i1 is converted to return a v2i1. Rename the
696  // function and deal with it below in UpgradeIntrinsicCall.
697  rename(F);
698  return true;
699  }
700  // These too are changed to accept a v2i1 insteead of the old v4i1.
701  if (Name == "arm.mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
702  Name == "arm.mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
703  Name == "arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
704  Name == "arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
705  Name == "arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
706  Name == "arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
707  Name == "arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
708  Name == "arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
709  Name == "arm.cde.vcx1q.predicated.v2i64.v4i1" ||
710  Name == "arm.cde.vcx1qa.predicated.v2i64.v4i1" ||
711  Name == "arm.cde.vcx2q.predicated.v2i64.v4i1" ||
712  Name == "arm.cde.vcx2qa.predicated.v2i64.v4i1" ||
713  Name == "arm.cde.vcx3q.predicated.v2i64.v4i1" ||
714  Name == "arm.cde.vcx3qa.predicated.v2i64.v4i1")
715  return true;
716 
717  if (Name == "amdgcn.alignbit") {
718  // Target specific intrinsic became redundant
719  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::fshr,
720  {F->getReturnType()});
721  return true;
722  }
723 
724  break;
725  }
726 
727  case 'c': {
728  if (Name.startswith("ctlz.") && F->arg_size() == 1) {
729  rename(F);
730  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
731  F->arg_begin()->getType());
732  return true;
733  }
734  if (Name.startswith("cttz.") && F->arg_size() == 1) {
735  rename(F);
736  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
737  F->arg_begin()->getType());
738  return true;
739  }
740  break;
741  }
742  case 'd': {
743  if (Name == "dbg.value" && F->arg_size() == 4) {
744  rename(F);
745  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
746  return true;
747  }
748  break;
749  }
750  case 'e': {
751  if (Name.startswith("experimental.vector.extract.")) {
752  rename(F);
753  Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
754  NewFn = Intrinsic::getDeclaration(F->getParent(),
755  Intrinsic::vector_extract, Tys);
756  return true;
757  }
758 
759  if (Name.startswith("experimental.vector.insert.")) {
760  rename(F);
761  auto Args = F->getFunctionType()->params();
762  Type *Tys[] = {Args[0], Args[1]};
763  NewFn = Intrinsic::getDeclaration(F->getParent(),
764  Intrinsic::vector_insert, Tys);
765  return true;
766  }
767 
769  static const Regex R("^experimental.vector.reduce.([a-z]+)\\.[a-z][0-9]+");
770  if (R.match(Name, &Groups)) {
772  ID = StringSwitch<Intrinsic::ID>(Groups[1])
773  .Case("add", Intrinsic::vector_reduce_add)
774  .Case("mul", Intrinsic::vector_reduce_mul)
775  .Case("and", Intrinsic::vector_reduce_and)
776  .Case("or", Intrinsic::vector_reduce_or)
777  .Case("xor", Intrinsic::vector_reduce_xor)
778  .Case("smax", Intrinsic::vector_reduce_smax)
779  .Case("smin", Intrinsic::vector_reduce_smin)
780  .Case("umax", Intrinsic::vector_reduce_umax)
781  .Case("umin", Intrinsic::vector_reduce_umin)
782  .Case("fmax", Intrinsic::vector_reduce_fmax)
783  .Case("fmin", Intrinsic::vector_reduce_fmin)
785  if (ID != Intrinsic::not_intrinsic) {
786  rename(F);
787  auto Args = F->getFunctionType()->params();
788  NewFn = Intrinsic::getDeclaration(F->getParent(), ID, {Args[0]});
789  return true;
790  }
791  }
792  static const Regex R2(
793  "^experimental.vector.reduce.v2.([a-z]+)\\.[fi][0-9]+");
794  Groups.clear();
795  if (R2.match(Name, &Groups)) {
797  if (Groups[1] == "fadd")
798  ID = Intrinsic::vector_reduce_fadd;
799  if (Groups[1] == "fmul")
800  ID = Intrinsic::vector_reduce_fmul;
801  if (ID != Intrinsic::not_intrinsic) {
802  rename(F);
803  auto Args = F->getFunctionType()->params();
804  Type *Tys[] = {Args[1]};
805  NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
806  return true;
807  }
808  }
809  break;
810  }
811  case 'i':
812  case 'l': {
813  bool IsLifetimeStart = Name.startswith("lifetime.start");
814  if (IsLifetimeStart || Name.startswith("invariant.start")) {
815  Intrinsic::ID ID = IsLifetimeStart ?
816  Intrinsic::lifetime_start : Intrinsic::invariant_start;
817  auto Args = F->getFunctionType()->params();
818  Type* ObjectPtr[1] = {Args[1]};
819  if (F->getName() != Intrinsic::getName(ID, ObjectPtr, F->getParent())) {
820  rename(F);
821  NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
822  return true;
823  }
824  }
825 
826  bool IsLifetimeEnd = Name.startswith("lifetime.end");
827  if (IsLifetimeEnd || Name.startswith("invariant.end")) {
828  Intrinsic::ID ID = IsLifetimeEnd ?
829  Intrinsic::lifetime_end : Intrinsic::invariant_end;
830 
831  auto Args = F->getFunctionType()->params();
832  Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
833  if (F->getName() != Intrinsic::getName(ID, ObjectPtr, F->getParent())) {
834  rename(F);
835  NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
836  return true;
837  }
838  }
839  if (Name.startswith("invariant.group.barrier")) {
840  // Rename invariant.group.barrier to launder.invariant.group
841  auto Args = F->getFunctionType()->params();
842  Type* ObjectPtr[1] = {Args[0]};
843  rename(F);
844  NewFn = Intrinsic::getDeclaration(F->getParent(),
845  Intrinsic::launder_invariant_group, ObjectPtr);
846  return true;
847 
848  }
849 
850  break;
851  }
852  case 'm': {
853  if (Name.startswith("masked.load.")) {
854  Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
855  if (F->getName() !=
856  Intrinsic::getName(Intrinsic::masked_load, Tys, F->getParent())) {
857  rename(F);
858  NewFn = Intrinsic::getDeclaration(F->getParent(),
859  Intrinsic::masked_load,
860  Tys);
861  return true;
862  }
863  }
864  if (Name.startswith("masked.store.")) {
865  auto Args = F->getFunctionType()->params();
866  Type *Tys[] = { Args[0], Args[1] };
867  if (F->getName() !=
868  Intrinsic::getName(Intrinsic::masked_store, Tys, F->getParent())) {
869  rename(F);
870  NewFn = Intrinsic::getDeclaration(F->getParent(),
871  Intrinsic::masked_store,
872  Tys);
873  return true;
874  }
875  }
876  // Renaming gather/scatter intrinsics with no address space overloading
877  // to the new overload which includes an address space
878  if (Name.startswith("masked.gather.")) {
879  Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
880  if (F->getName() !=
881  Intrinsic::getName(Intrinsic::masked_gather, Tys, F->getParent())) {
882  rename(F);
883  NewFn = Intrinsic::getDeclaration(F->getParent(),
884  Intrinsic::masked_gather, Tys);
885  return true;
886  }
887  }
888  if (Name.startswith("masked.scatter.")) {
889  auto Args = F->getFunctionType()->params();
890  Type *Tys[] = {Args[0], Args[1]};
891  if (F->getName() !=
892  Intrinsic::getName(Intrinsic::masked_scatter, Tys, F->getParent())) {
893  rename(F);
894  NewFn = Intrinsic::getDeclaration(F->getParent(),
895  Intrinsic::masked_scatter, Tys);
896  return true;
897  }
898  }
899  // Updating the memory intrinsics (memcpy/memmove/memset) that have an
900  // alignment parameter to embedding the alignment as an attribute of
901  // the pointer args.
902  if (Name.startswith("memcpy.") && F->arg_size() == 5) {
903  rename(F);
904  // Get the types of dest, src, and len
905  ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
906  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy,
907  ParamTypes);
908  return true;
909  }
910  if (Name.startswith("memmove.") && F->arg_size() == 5) {
911  rename(F);
912  // Get the types of dest, src, and len
913  ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
914  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove,
915  ParamTypes);
916  return true;
917  }
918  if (Name.startswith("memset.") && F->arg_size() == 5) {
919  rename(F);
920  // Get the types of dest, and len
921  const auto *FT = F->getFunctionType();
922  Type *ParamTypes[2] = {
923  FT->getParamType(0), // Dest
924  FT->getParamType(2) // len
925  };
926  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
927  ParamTypes);
928  return true;
929  }
930  break;
931  }
932  case 'n': {
933  if (Name.startswith("nvvm.")) {
934  Name = Name.substr(5);
935 
936  // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
938  .Cases("brev32", "brev64", Intrinsic::bitreverse)
939  .Case("clz.i", Intrinsic::ctlz)
940  .Case("popc.i", Intrinsic::ctpop)
942  if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
943  NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
944  {F->getReturnType()});
945  return true;
946  }
947 
948  // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
949  // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
950  //
951  // TODO: We could add lohi.i2d.
952  bool Expand = StringSwitch<bool>(Name)
953  .Cases("abs.i", "abs.ll", true)
954  .Cases("clz.ll", "popc.ll", "h2f", true)
955  .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
956  .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
957  .StartsWith("atomic.load.add.f32.p", true)
958  .StartsWith("atomic.load.add.f64.p", true)
959  .Default(false);
960  if (Expand) {
961  NewFn = nullptr;
962  return true;
963  }
964  }
965  break;
966  }
967  case 'o':
968  // We only need to change the name to match the mangling including the
969  // address space.
970  if (Name.startswith("objectsize.")) {
971  Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
972  if (F->arg_size() == 2 || F->arg_size() == 3 ||
973  F->getName() !=
974  Intrinsic::getName(Intrinsic::objectsize, Tys, F->getParent())) {
975  rename(F);
976  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
977  Tys);
978  return true;
979  }
980  }
981  break;
982 
983  case 'p':
984  if (Name == "prefetch") {
985  // Handle address space overloading.
986  Type *Tys[] = {F->arg_begin()->getType()};
987  if (F->getName() !=
988  Intrinsic::getName(Intrinsic::prefetch, Tys, F->getParent())) {
989  rename(F);
990  NewFn =
991  Intrinsic::getDeclaration(F->getParent(), Intrinsic::prefetch, Tys);
992  return true;
993  }
994  } else if (Name.startswith("ptr.annotation.") && F->arg_size() == 4) {
995  rename(F);
996  NewFn = Intrinsic::getDeclaration(F->getParent(),
997  Intrinsic::ptr_annotation,
998  F->arg_begin()->getType());
999  return true;
1000  }
1001  break;
1002 
1003  case 's':
1004  if (Name == "stackprotectorcheck") {
1005  NewFn = nullptr;
1006  return true;
1007  }
1008  break;
1009 
1010  case 'v': {
1011  if (Name == "var.annotation" && F->arg_size() == 4) {
1012  rename(F);
1013  NewFn = Intrinsic::getDeclaration(F->getParent(),
1014  Intrinsic::var_annotation);
1015  return true;
1016  }
1017  break;
1018  }
1019 
1020  case 'x':
1021  if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
1022  return true;
1023  }
1024 
1025  auto *ST = dyn_cast<StructType>(F->getReturnType());
1026  if (ST && (!ST->isLiteral() || ST->isPacked())) {
1027  // Replace return type with literal non-packed struct. Only do this for
1028  // intrinsics declared to return a struct, not for intrinsics with
1029  // overloaded return type, in which case the exact struct type will be
1030  // mangled into the name.
1032  Intrinsic::getIntrinsicInfoTableEntries(F->getIntrinsicID(), Desc);
1033  if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1034  auto *FT = F->getFunctionType();
1035  auto *NewST = StructType::get(ST->getContext(), ST->elements());
1036  auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1037  std::string Name = F->getName().str();
1038  rename(F);
1039  NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1040  Name, F->getParent());
1041 
1042  // The new function may also need remangling.
1043  if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F))
1044  NewFn = *Result;
1045  return true;
1046  }
1047  }
1048 
1049  // Remangle our intrinsic since we upgrade the mangling
1051  if (Result != None) {
1052  NewFn = *Result;
1053  return true;
1054  }
1055 
1056  // This may not belong here. This function is effectively being overloaded
1057  // to both detect an intrinsic which needs upgrading, and to provide the
1058  // upgraded form of the intrinsic. We should perhaps have two separate
1059  // functions for this.
1060  return false;
1061 }
1062 
1064  NewFn = nullptr;
1065  bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
1066  assert(F != NewFn && "Intrinsic function upgraded to the same function");
1067 
1068  // Upgrade intrinsic attributes. This does not change the function.
1069  if (NewFn)
1070  F = NewFn;
1071  if (Intrinsic::ID id = F->getIntrinsicID())
1072  F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
1073  return Upgraded;
1074 }
1075 
1077  if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1078  GV->getName() == "llvm.global_dtors")) ||
1079  !GV->hasInitializer())
1080  return nullptr;
1081  ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
1082  if (!ATy)
1083  return nullptr;
1084  StructType *STy = dyn_cast<StructType>(ATy->getElementType());
1085  if (!STy || STy->getNumElements() != 2)
1086  return nullptr;
1087 
1088  LLVMContext &C = GV->getContext();
1089  IRBuilder<> IRB(C);
1090  auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1091  IRB.getInt8PtrTy());
1092  Constant *Init = GV->getInitializer();
1093  unsigned N = Init->getNumOperands();
1094  std::vector<Constant *> NewCtors(N);
1095  for (unsigned i = 0; i != N; ++i) {
1096  auto Ctor = cast<Constant>(Init->getOperand(i));
1097  NewCtors[i] = ConstantStruct::get(
1098  EltTy, Ctor->getAggregateElement(0u), Ctor->getAggregateElement(1),
1100  }
1101  Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1102 
1103  return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1104  NewInit, GV->getName());
1105 }
1106 
1107 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1108 // to byte shuffles.
1110  Value *Op, unsigned Shift) {
1111  auto *ResultTy = cast<FixedVectorType>(Op->getType());
1112  unsigned NumElts = ResultTy->getNumElements() * 8;
1113 
1114  // Bitcast from a 64-bit element type to a byte element type.
1115  Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1116  Op = Builder.CreateBitCast(Op, VecTy, "cast");
1117 
1118  // We'll be shuffling in zeroes.
1119  Value *Res = Constant::getNullValue(VecTy);
1120 
1121  // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1122  // we'll just return the zero vector.
1123  if (Shift < 16) {
1124  int Idxs[64];
1125  // 256/512-bit version is split into 2/4 16-byte lanes.
1126  for (unsigned l = 0; l != NumElts; l += 16)
1127  for (unsigned i = 0; i != 16; ++i) {
1128  unsigned Idx = NumElts + i - Shift;
1129  if (Idx < NumElts)
1130  Idx -= NumElts - 16; // end of lane, switch operand.
1131  Idxs[l + i] = Idx + l;
1132  }
1133 
1134  Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
1135  }
1136 
1137  // Bitcast back to a 64-bit element type.
1138  return Builder.CreateBitCast(Res, ResultTy, "cast");
1139 }
1140 
1141 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1142 // to byte shuffles.
1144  unsigned Shift) {
1145  auto *ResultTy = cast<FixedVectorType>(Op->getType());
1146  unsigned NumElts = ResultTy->getNumElements() * 8;
1147 
1148  // Bitcast from a 64-bit element type to a byte element type.
1149  Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1150  Op = Builder.CreateBitCast(Op, VecTy, "cast");
1151 
1152  // We'll be shuffling in zeroes.
1153  Value *Res = Constant::getNullValue(VecTy);
1154 
1155  // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1156  // we'll just return the zero vector.
1157  if (Shift < 16) {
1158  int Idxs[64];
1159  // 256/512-bit version is split into 2/4 16-byte lanes.
1160  for (unsigned l = 0; l != NumElts; l += 16)
1161  for (unsigned i = 0; i != 16; ++i) {
1162  unsigned Idx = i + Shift;
1163  if (Idx >= 16)
1164  Idx += NumElts - 16; // end of lane, switch operand.
1165  Idxs[l + i] = Idx + l;
1166  }
1167 
1168  Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
1169  }
1170 
1171  // Bitcast back to a 64-bit element type.
1172  return Builder.CreateBitCast(Res, ResultTy, "cast");
1173 }
1174 
1176  unsigned NumElts) {
1177  assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1179  Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1180  Mask = Builder.CreateBitCast(Mask, MaskTy);
1181 
1182  // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1183  // i8 and we need to extract down to the right number of elements.
1184  if (NumElts <= 4) {
1185  int Indices[4];
1186  for (unsigned i = 0; i != NumElts; ++i)
1187  Indices[i] = i;
1188  Mask = Builder.CreateShuffleVector(
1189  Mask, Mask, makeArrayRef(Indices, NumElts), "extract");
1190  }
1191 
1192  return Mask;
1193 }
1194 
1196  Value *Op0, Value *Op1) {
1197  // If the mask is all ones just emit the first operation.
1198  if (const auto *C = dyn_cast<Constant>(Mask))
1199  if (C->isAllOnesValue())
1200  return Op0;
1201 
1203  cast<FixedVectorType>(Op0->getType())->getNumElements());
1204  return Builder.CreateSelect(Mask, Op0, Op1);
1205 }
1206 
1208  Value *Op0, Value *Op1) {
1209  // If the mask is all ones just emit the first operation.
1210  if (const auto *C = dyn_cast<Constant>(Mask))
1211  if (C->isAllOnesValue())
1212  return Op0;
1213 
1214  auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1215  Mask->getType()->getIntegerBitWidth());
1216  Mask = Builder.CreateBitCast(Mask, MaskTy);
1217  Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1218  return Builder.CreateSelect(Mask, Op0, Op1);
1219 }
1220 
1221 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1222 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1223 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1225  Value *Op1, Value *Shift,
1226  Value *Passthru, Value *Mask,
1227  bool IsVALIGN) {
1228  unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1229 
1230  unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1231  assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1232  assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1233  assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1234 
1235  // Mask the immediate for VALIGN.
1236  if (IsVALIGN)
1237  ShiftVal &= (NumElts - 1);
1238 
1239  // If palignr is shifting the pair of vectors more than the size of two
1240  // lanes, emit zero.
1241  if (ShiftVal >= 32)
1242  return llvm::Constant::getNullValue(Op0->getType());
1243 
1244  // If palignr is shifting the pair of input vectors more than one lane,
1245  // but less than two lanes, convert to shifting in zeroes.
1246  if (ShiftVal > 16) {
1247  ShiftVal -= 16;
1248  Op1 = Op0;
1249  Op0 = llvm::Constant::getNullValue(Op0->getType());
1250  }
1251 
1252  int Indices[64];
1253  // 256-bit palignr operates on 128-bit lanes so we need to handle that
1254  for (unsigned l = 0; l < NumElts; l += 16) {
1255  for (unsigned i = 0; i != 16; ++i) {
1256  unsigned Idx = ShiftVal + i;
1257  if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1258  Idx += NumElts - 16; // End of lane, switch operand.
1259  Indices[l + i] = Idx + l;
1260  }
1261  }
1262 
1263  Value *Align = Builder.CreateShuffleVector(Op1, Op0,
1264  makeArrayRef(Indices, NumElts),
1265  "palignr");
1266 
1267  return EmitX86Select(Builder, Mask, Align, Passthru);
1268 }
1269 
1271  bool ZeroMask, bool IndexForm) {
1272  Type *Ty = CI.getType();
1273  unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1274  unsigned EltWidth = Ty->getScalarSizeInBits();
1275  bool IsFloat = Ty->isFPOrFPVectorTy();
1276  Intrinsic::ID IID;
1277  if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1278  IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1279  else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1280  IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1281  else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1282  IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1283  else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1284  IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1285  else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1286  IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1287  else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1288  IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1289  else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1290  IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1291  else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1292  IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1293  else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1294  IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1295  else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1296  IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1297  else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1298  IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1299  else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1300  IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1301  else if (VecWidth == 128 && EltWidth == 16)
1302  IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1303  else if (VecWidth == 256 && EltWidth == 16)
1304  IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1305  else if (VecWidth == 512 && EltWidth == 16)
1306  IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1307  else if (VecWidth == 128 && EltWidth == 8)
1308  IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1309  else if (VecWidth == 256 && EltWidth == 8)
1310  IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1311  else if (VecWidth == 512 && EltWidth == 8)
1312  IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1313  else
1314  llvm_unreachable("Unexpected intrinsic");
1315 
1316  Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1317  CI.getArgOperand(2) };
1318 
1319  // If this isn't index form we need to swap operand 0 and 1.
1320  if (!IndexForm)
1321  std::swap(Args[0], Args[1]);
1322 
1323  Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1324  Args);
1325  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1326  : Builder.CreateBitCast(CI.getArgOperand(1),
1327  Ty);
1328  return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1329 }
1330 
1332  Intrinsic::ID IID) {
1333  Type *Ty = CI.getType();
1334  Value *Op0 = CI.getOperand(0);
1335  Value *Op1 = CI.getOperand(1);
1336  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1337  Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
1338 
1339  if (CI.arg_size() == 4) { // For masked intrinsics.
1340  Value *VecSrc = CI.getOperand(2);
1341  Value *Mask = CI.getOperand(3);
1342  Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1343  }
1344  return Res;
1345 }
1346 
1348  bool IsRotateRight) {
1349  Type *Ty = CI.getType();
1350  Value *Src = CI.getArgOperand(0);
1351  Value *Amt = CI.getArgOperand(1);
1352 
1353  // Amount may be scalar immediate, in which case create a splat vector.
1354  // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1355  // we only care about the lowest log2 bits anyway.
1356  if (Amt->getType() != Ty) {
1357  unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1358  Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1359  Amt = Builder.CreateVectorSplat(NumElts, Amt);
1360  }
1361 
1362  Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1363  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1364  Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
1365 
1366  if (CI.arg_size() == 4) { // For masked intrinsics.
1367  Value *VecSrc = CI.getOperand(2);
1368  Value *Mask = CI.getOperand(3);
1369  Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1370  }
1371  return Res;
1372 }
1373 
1375  bool IsSigned) {
1376  Type *Ty = CI.getType();
1377  Value *LHS = CI.getArgOperand(0);
1378  Value *RHS = CI.getArgOperand(1);
1379 
1380  CmpInst::Predicate Pred;
1381  switch (Imm) {
1382  case 0x0:
1383  Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1384  break;
1385  case 0x1:
1386  Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1387  break;
1388  case 0x2:
1389  Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1390  break;
1391  case 0x3:
1392  Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1393  break;
1394  case 0x4:
1395  Pred = ICmpInst::ICMP_EQ;
1396  break;
1397  case 0x5:
1398  Pred = ICmpInst::ICMP_NE;
1399  break;
1400  case 0x6:
1401  return Constant::getNullValue(Ty); // FALSE
1402  case 0x7:
1403  return Constant::getAllOnesValue(Ty); // TRUE
1404  default:
1405  llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1406  }
1407 
1408  Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1409  Value *Ext = Builder.CreateSExt(Cmp, Ty);
1410  return Ext;
1411 }
1412 
1414  bool IsShiftRight, bool ZeroMask) {
1415  Type *Ty = CI.getType();
1416  Value *Op0 = CI.getArgOperand(0);
1417  Value *Op1 = CI.getArgOperand(1);
1418  Value *Amt = CI.getArgOperand(2);
1419 
1420  if (IsShiftRight)
1421  std::swap(Op0, Op1);
1422 
1423  // Amount may be scalar immediate, in which case create a splat vector.
1424  // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1425  // we only care about the lowest log2 bits anyway.
1426  if (Amt->getType() != Ty) {
1427  unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1428  Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1429  Amt = Builder.CreateVectorSplat(NumElts, Amt);
1430  }
1431 
1432  Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1433  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1434  Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
1435 
1436  unsigned NumArgs = CI.arg_size();
1437  if (NumArgs >= 4) { // For masked intrinsics.
1438  Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1439  ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
1440  CI.getArgOperand(0);
1441  Value *Mask = CI.getOperand(NumArgs - 1);
1442  Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1443  }
1444  return Res;
1445 }
1446 
1448  Value *Ptr, Value *Data, Value *Mask,
1449  bool Aligned) {
1450  // Cast the pointer to the right type.
1451  Ptr = Builder.CreateBitCast(Ptr,
1452  llvm::PointerType::getUnqual(Data->getType()));
1453  const Align Alignment =
1454  Aligned
1455  ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedSize() / 8)
1456  : Align(1);
1457 
1458  // If the mask is all ones just emit a regular store.
1459  if (const auto *C = dyn_cast<Constant>(Mask))
1460  if (C->isAllOnesValue())
1461  return Builder.CreateAlignedStore(Data, Ptr, Alignment);
1462 
1463  // Convert the mask from an integer type to a vector of i1.
1464  unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
1465  Mask = getX86MaskVec(Builder, Mask, NumElts);
1466  return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
1467 }
1468 
1470  Value *Ptr, Value *Passthru, Value *Mask,
1471  bool Aligned) {
1472  Type *ValTy = Passthru->getType();
1473  // Cast the pointer to the right type.
1474  Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy));
1475  const Align Alignment =
1476  Aligned
1477  ? Align(Passthru->getType()->getPrimitiveSizeInBits().getFixedSize() /
1478  8)
1479  : Align(1);
1480 
1481  // If the mask is all ones just emit a regular store.
1482  if (const auto *C = dyn_cast<Constant>(Mask))
1483  if (C->isAllOnesValue())
1484  return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
1485 
1486  // Convert the mask from an integer type to a vector of i1.
1487  unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
1488  Mask = getX86MaskVec(Builder, Mask, NumElts);
1489  return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
1490 }
1491 
1493  Type *Ty = CI.getType();
1494  Value *Op0 = CI.getArgOperand(0);
1496  Value *Res = Builder.CreateCall(F, {Op0, Builder.getInt1(false)});
1497  if (CI.arg_size() == 3)
1498  Res = EmitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
1499  return Res;
1500 }
1501 
1502 static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
1503  Type *Ty = CI.getType();
1504 
1505  // Arguments have a vXi32 type so cast to vXi64.
1506  Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1507  Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1508 
1509  if (IsSigned) {
1510  // Shift left then arithmetic shift right.
1511  Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1512  LHS = Builder.CreateShl(LHS, ShiftAmt);
1513  LHS = Builder.CreateAShr(LHS, ShiftAmt);
1514  RHS = Builder.CreateShl(RHS, ShiftAmt);
1515  RHS = Builder.CreateAShr(RHS, ShiftAmt);
1516  } else {
1517  // Clear the upper bits.
1518  Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1519  LHS = Builder.CreateAnd(LHS, Mask);
1520  RHS = Builder.CreateAnd(RHS, Mask);
1521  }
1522 
1523  Value *Res = Builder.CreateMul(LHS, RHS);
1524 
1525  if (CI.arg_size() == 4)
1526  Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1527 
1528  return Res;
1529 }
1530 
1531 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1533  Value *Mask) {
1534  unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
1535  if (Mask) {
1536  const auto *C = dyn_cast<Constant>(Mask);
1537  if (!C || !C->isAllOnesValue())
1538  Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1539  }
1540 
1541  if (NumElts < 8) {
1542  int Indices[8];
1543  for (unsigned i = 0; i != NumElts; ++i)
1544  Indices[i] = i;
1545  for (unsigned i = NumElts; i != 8; ++i)
1546  Indices[i] = NumElts + i % NumElts;
1547  Vec = Builder.CreateShuffleVector(Vec,
1549  Indices);
1550  }
1551  return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1552 }
1553 
1555  unsigned CC, bool Signed) {
1556  Value *Op0 = CI.getArgOperand(0);
1557  unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1558 
1559  Value *Cmp;
1560  if (CC == 3) {
1561  Cmp = Constant::getNullValue(
1562  FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1563  } else if (CC == 7) {
1565  FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1566  } else {
1567  ICmpInst::Predicate Pred;
1568  switch (CC) {
1569  default: llvm_unreachable("Unknown condition code");
1570  case 0: Pred = ICmpInst::ICMP_EQ; break;
1571  case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1572  case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1573  case 4: Pred = ICmpInst::ICMP_NE; break;
1574  case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1575  case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1576  }
1577  Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
1578  }
1579 
1580  Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
1581 
1582  return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
1583 }
1584 
1585 // Replace a masked intrinsic with an older unmasked intrinsic.
1587  Intrinsic::ID IID) {
1588  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
1589  Value *Rep = Builder.CreateCall(Intrin,
1590  { CI.getArgOperand(0), CI.getArgOperand(1) });
1591  return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
1592 }
1593 
1595  Value* A = CI.getArgOperand(0);
1596  Value* B = CI.getArgOperand(1);
1597  Value* Src = CI.getArgOperand(2);
1598  Value* Mask = CI.getArgOperand(3);
1599 
1600  Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
1601  Value* Cmp = Builder.CreateIsNotNull(AndNode);
1602  Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
1603  Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
1604  Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
1605  return Builder.CreateInsertElement(A, Select, (uint64_t)0);
1606 }
1607 
1608 
1610  Value* Op = CI.getArgOperand(0);
1611  Type* ReturnOp = CI.getType();
1612  unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
1613  Value *Mask = getX86MaskVec(Builder, Op, NumElts);
1614  return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1615 }
1616 
1617 // Replace intrinsic with unmasked version and a select.
1619  CallBase &CI, Value *&Rep) {
1620  Name = Name.substr(12); // Remove avx512.mask.
1621 
1622  unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
1623  unsigned EltWidth = CI.getType()->getScalarSizeInBits();
1624  Intrinsic::ID IID;
1625  if (Name.startswith("max.p")) {
1626  if (VecWidth == 128 && EltWidth == 32)
1627  IID = Intrinsic::x86_sse_max_ps;
1628  else if (VecWidth == 128 && EltWidth == 64)
1629  IID = Intrinsic::x86_sse2_max_pd;
1630  else if (VecWidth == 256 && EltWidth == 32)
1631  IID = Intrinsic::x86_avx_max_ps_256;
1632  else if (VecWidth == 256 && EltWidth == 64)
1633  IID = Intrinsic::x86_avx_max_pd_256;
1634  else
1635  llvm_unreachable("Unexpected intrinsic");
1636  } else if (Name.startswith("min.p")) {
1637  if (VecWidth == 128 && EltWidth == 32)
1638  IID = Intrinsic::x86_sse_min_ps;
1639  else if (VecWidth == 128 && EltWidth == 64)
1640  IID = Intrinsic::x86_sse2_min_pd;
1641  else if (VecWidth == 256 && EltWidth == 32)
1642  IID = Intrinsic::x86_avx_min_ps_256;
1643  else if (VecWidth == 256 && EltWidth == 64)
1644  IID = Intrinsic::x86_avx_min_pd_256;
1645  else
1646  llvm_unreachable("Unexpected intrinsic");
1647  } else if (Name.startswith("pshuf.b.")) {
1648  if (VecWidth == 128)
1649  IID = Intrinsic::x86_ssse3_pshuf_b_128;
1650  else if (VecWidth == 256)
1651  IID = Intrinsic::x86_avx2_pshuf_b;
1652  else if (VecWidth == 512)
1653  IID = Intrinsic::x86_avx512_pshuf_b_512;
1654  else
1655  llvm_unreachable("Unexpected intrinsic");
1656  } else if (Name.startswith("pmul.hr.sw.")) {
1657  if (VecWidth == 128)
1658  IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
1659  else if (VecWidth == 256)
1660  IID = Intrinsic::x86_avx2_pmul_hr_sw;
1661  else if (VecWidth == 512)
1662  IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
1663  else
1664  llvm_unreachable("Unexpected intrinsic");
1665  } else if (Name.startswith("pmulh.w.")) {
1666  if (VecWidth == 128)
1667  IID = Intrinsic::x86_sse2_pmulh_w;
1668  else if (VecWidth == 256)
1669  IID = Intrinsic::x86_avx2_pmulh_w;
1670  else if (VecWidth == 512)
1671  IID = Intrinsic::x86_avx512_pmulh_w_512;
1672  else
1673  llvm_unreachable("Unexpected intrinsic");
1674  } else if (Name.startswith("pmulhu.w.")) {
1675  if (VecWidth == 128)
1676  IID = Intrinsic::x86_sse2_pmulhu_w;
1677  else if (VecWidth == 256)
1678  IID = Intrinsic::x86_avx2_pmulhu_w;
1679  else if (VecWidth == 512)
1680  IID = Intrinsic::x86_avx512_pmulhu_w_512;
1681  else
1682  llvm_unreachable("Unexpected intrinsic");
1683  } else if (Name.startswith("pmaddw.d.")) {
1684  if (VecWidth == 128)
1685  IID = Intrinsic::x86_sse2_pmadd_wd;
1686  else if (VecWidth == 256)
1687  IID = Intrinsic::x86_avx2_pmadd_wd;
1688  else if (VecWidth == 512)
1689  IID = Intrinsic::x86_avx512_pmaddw_d_512;
1690  else
1691  llvm_unreachable("Unexpected intrinsic");
1692  } else if (Name.startswith("pmaddubs.w.")) {
1693  if (VecWidth == 128)
1694  IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
1695  else if (VecWidth == 256)
1696  IID = Intrinsic::x86_avx2_pmadd_ub_sw;
1697  else if (VecWidth == 512)
1698  IID = Intrinsic::x86_avx512_pmaddubs_w_512;
1699  else
1700  llvm_unreachable("Unexpected intrinsic");
1701  } else if (Name.startswith("packsswb.")) {
1702  if (VecWidth == 128)
1703  IID = Intrinsic::x86_sse2_packsswb_128;
1704  else if (VecWidth == 256)
1705  IID = Intrinsic::x86_avx2_packsswb;
1706  else if (VecWidth == 512)
1707  IID = Intrinsic::x86_avx512_packsswb_512;
1708  else
1709  llvm_unreachable("Unexpected intrinsic");
1710  } else if (Name.startswith("packssdw.")) {
1711  if (VecWidth == 128)
1712  IID = Intrinsic::x86_sse2_packssdw_128;
1713  else if (VecWidth == 256)
1714  IID = Intrinsic::x86_avx2_packssdw;
1715  else if (VecWidth == 512)
1716  IID = Intrinsic::x86_avx512_packssdw_512;
1717  else
1718  llvm_unreachable("Unexpected intrinsic");
1719  } else if (Name.startswith("packuswb.")) {
1720  if (VecWidth == 128)
1721  IID = Intrinsic::x86_sse2_packuswb_128;
1722  else if (VecWidth == 256)
1723  IID = Intrinsic::x86_avx2_packuswb;
1724  else if (VecWidth == 512)
1725  IID = Intrinsic::x86_avx512_packuswb_512;
1726  else
1727  llvm_unreachable("Unexpected intrinsic");
1728  } else if (Name.startswith("packusdw.")) {
1729  if (VecWidth == 128)
1730  IID = Intrinsic::x86_sse41_packusdw;
1731  else if (VecWidth == 256)
1732  IID = Intrinsic::x86_avx2_packusdw;
1733  else if (VecWidth == 512)
1734  IID = Intrinsic::x86_avx512_packusdw_512;
1735  else
1736  llvm_unreachable("Unexpected intrinsic");
1737  } else if (Name.startswith("vpermilvar.")) {
1738  if (VecWidth == 128 && EltWidth == 32)
1739  IID = Intrinsic::x86_avx_vpermilvar_ps;
1740  else if (VecWidth == 128 && EltWidth == 64)
1741  IID = Intrinsic::x86_avx_vpermilvar_pd;
1742  else if (VecWidth == 256 && EltWidth == 32)
1743  IID = Intrinsic::x86_avx_vpermilvar_ps_256;
1744  else if (VecWidth == 256 && EltWidth == 64)
1745  IID = Intrinsic::x86_avx_vpermilvar_pd_256;
1746  else if (VecWidth == 512 && EltWidth == 32)
1747  IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
1748  else if (VecWidth == 512 && EltWidth == 64)
1749  IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
1750  else
1751  llvm_unreachable("Unexpected intrinsic");
1752  } else if (Name == "cvtpd2dq.256") {
1753  IID = Intrinsic::x86_avx_cvt_pd2dq_256;
1754  } else if (Name == "cvtpd2ps.256") {
1755  IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
1756  } else if (Name == "cvttpd2dq.256") {
1757  IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
1758  } else if (Name == "cvttps2dq.128") {
1759  IID = Intrinsic::x86_sse2_cvttps2dq;
1760  } else if (Name == "cvttps2dq.256") {
1761  IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
1762  } else if (Name.startswith("permvar.")) {
1763  bool IsFloat = CI.getType()->isFPOrFPVectorTy();
1764  if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1765  IID = Intrinsic::x86_avx2_permps;
1766  else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1767  IID = Intrinsic::x86_avx2_permd;
1768  else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1769  IID = Intrinsic::x86_avx512_permvar_df_256;
1770  else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1771  IID = Intrinsic::x86_avx512_permvar_di_256;
1772  else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1773  IID = Intrinsic::x86_avx512_permvar_sf_512;
1774  else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1775  IID = Intrinsic::x86_avx512_permvar_si_512;
1776  else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1777  IID = Intrinsic::x86_avx512_permvar_df_512;
1778  else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1779  IID = Intrinsic::x86_avx512_permvar_di_512;
1780  else if (VecWidth == 128 && EltWidth == 16)
1781  IID = Intrinsic::x86_avx512_permvar_hi_128;
1782  else if (VecWidth == 256 && EltWidth == 16)
1783  IID = Intrinsic::x86_avx512_permvar_hi_256;
1784  else if (VecWidth == 512 && EltWidth == 16)
1785  IID = Intrinsic::x86_avx512_permvar_hi_512;
1786  else if (VecWidth == 128 && EltWidth == 8)
1787  IID = Intrinsic::x86_avx512_permvar_qi_128;
1788  else if (VecWidth == 256 && EltWidth == 8)
1789  IID = Intrinsic::x86_avx512_permvar_qi_256;
1790  else if (VecWidth == 512 && EltWidth == 8)
1791  IID = Intrinsic::x86_avx512_permvar_qi_512;
1792  else
1793  llvm_unreachable("Unexpected intrinsic");
1794  } else if (Name.startswith("dbpsadbw.")) {
1795  if (VecWidth == 128)
1796  IID = Intrinsic::x86_avx512_dbpsadbw_128;
1797  else if (VecWidth == 256)
1798  IID = Intrinsic::x86_avx512_dbpsadbw_256;
1799  else if (VecWidth == 512)
1800  IID = Intrinsic::x86_avx512_dbpsadbw_512;
1801  else
1802  llvm_unreachable("Unexpected intrinsic");
1803  } else if (Name.startswith("pmultishift.qb.")) {
1804  if (VecWidth == 128)
1805  IID = Intrinsic::x86_avx512_pmultishift_qb_128;
1806  else if (VecWidth == 256)
1807  IID = Intrinsic::x86_avx512_pmultishift_qb_256;
1808  else if (VecWidth == 512)
1809  IID = Intrinsic::x86_avx512_pmultishift_qb_512;
1810  else
1811  llvm_unreachable("Unexpected intrinsic");
1812  } else if (Name.startswith("conflict.")) {
1813  if (Name[9] == 'd' && VecWidth == 128)
1814  IID = Intrinsic::x86_avx512_conflict_d_128;
1815  else if (Name[9] == 'd' && VecWidth == 256)
1816  IID = Intrinsic::x86_avx512_conflict_d_256;
1817  else if (Name[9] == 'd' && VecWidth == 512)
1818  IID = Intrinsic::x86_avx512_conflict_d_512;
1819  else if (Name[9] == 'q' && VecWidth == 128)
1820  IID = Intrinsic::x86_avx512_conflict_q_128;
1821  else if (Name[9] == 'q' && VecWidth == 256)
1822  IID = Intrinsic::x86_avx512_conflict_q_256;
1823  else if (Name[9] == 'q' && VecWidth == 512)
1824  IID = Intrinsic::x86_avx512_conflict_q_512;
1825  else
1826  llvm_unreachable("Unexpected intrinsic");
1827  } else if (Name.startswith("pavg.")) {
1828  if (Name[5] == 'b' && VecWidth == 128)
1829  IID = Intrinsic::x86_sse2_pavg_b;
1830  else if (Name[5] == 'b' && VecWidth == 256)
1831  IID = Intrinsic::x86_avx2_pavg_b;
1832  else if (Name[5] == 'b' && VecWidth == 512)
1833  IID = Intrinsic::x86_avx512_pavg_b_512;
1834  else if (Name[5] == 'w' && VecWidth == 128)
1835  IID = Intrinsic::x86_sse2_pavg_w;
1836  else if (Name[5] == 'w' && VecWidth == 256)
1837  IID = Intrinsic::x86_avx2_pavg_w;
1838  else if (Name[5] == 'w' && VecWidth == 512)
1839  IID = Intrinsic::x86_avx512_pavg_w_512;
1840  else
1841  llvm_unreachable("Unexpected intrinsic");
1842  } else
1843  return false;
1844 
1846  Args.pop_back();
1847  Args.pop_back();
1848  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1849  Args);
1850  unsigned NumArgs = CI.arg_size();
1851  Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
1852  CI.getArgOperand(NumArgs - 2));
1853  return true;
1854 }
1855 
1856 /// Upgrade comment in call to inline asm that represents an objc retain release
1857 /// marker.
1858 void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
1859  size_t Pos;
1860  if (AsmStr->find("mov\tfp") == 0 &&
1861  AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
1862  (Pos = AsmStr->find("# marker")) != std::string::npos) {
1863  AsmStr->replace(Pos, 1, ";");
1864  }
1865 }
1866 
1868  IRBuilder<> &Builder) {
1869  if (Name == "mve.vctp64.old") {
1870  // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
1871  // correct type.
1872  Value *VCTP = Builder.CreateCall(
1873  Intrinsic::getDeclaration(F->getParent(), Intrinsic::arm_mve_vctp64),
1874  CI->getArgOperand(0), CI->getName());
1875  Value *C1 = Builder.CreateCall(
1877  F->getParent(), Intrinsic::arm_mve_pred_v2i,
1878  {VectorType::get(Builder.getInt1Ty(), 2, false)}),
1879  VCTP);
1880  return Builder.CreateCall(
1882  F->getParent(), Intrinsic::arm_mve_pred_i2v,
1883  {VectorType::get(Builder.getInt1Ty(), 4, false)}),
1884  C1);
1885  } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
1886  Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
1887  Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
1888  Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
1889  Name == "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
1890  Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
1891  Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
1892  Name == "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
1893  Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
1894  Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
1895  Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
1896  Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
1897  Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
1898  Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
1899  std::vector<Type *> Tys;
1900  unsigned ID = CI->getIntrinsicID();
1901  Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
1902  switch (ID) {
1903  case Intrinsic::arm_mve_mull_int_predicated:
1904  case Intrinsic::arm_mve_vqdmull_predicated:
1905  case Intrinsic::arm_mve_vldr_gather_base_predicated:
1906  Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
1907  break;
1908  case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
1909  case Intrinsic::arm_mve_vstr_scatter_base_predicated:
1910  case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
1911  Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
1912  V2I1Ty};
1913  break;
1914  case Intrinsic::arm_mve_vldr_gather_offset_predicated:
1915  Tys = {CI->getType(), CI->getOperand(0)->getType(),
1916  CI->getOperand(1)->getType(), V2I1Ty};
1917  break;
1918  case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
1919  Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
1920  CI->getOperand(2)->getType(), V2I1Ty};
1921  break;
1922  case Intrinsic::arm_cde_vcx1q_predicated:
1923  case Intrinsic::arm_cde_vcx1qa_predicated:
1924  case Intrinsic::arm_cde_vcx2q_predicated:
1925  case Intrinsic::arm_cde_vcx2qa_predicated:
1926  case Intrinsic::arm_cde_vcx3q_predicated:
1927  case Intrinsic::arm_cde_vcx3qa_predicated:
1928  Tys = {CI->getOperand(1)->getType(), V2I1Ty};
1929  break;
1930  default:
1931  llvm_unreachable("Unhandled Intrinsic!");
1932  }
1933 
1934  std::vector<Value *> Ops;
1935  for (Value *Op : CI->args()) {
1936  Type *Ty = Op->getType();
1937  if (Ty->getScalarSizeInBits() == 1) {
1938  Value *C1 = Builder.CreateCall(
1940  F->getParent(), Intrinsic::arm_mve_pred_v2i,
1941  {VectorType::get(Builder.getInt1Ty(), 4, false)}),
1942  Op);
1943  Op = Builder.CreateCall(
1944  Intrinsic::getDeclaration(F->getParent(),
1945  Intrinsic::arm_mve_pred_i2v, {V2I1Ty}),
1946  C1);
1947  }
1948  Ops.push_back(Op);
1949  }
1950 
1951  Function *Fn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
1952  return Builder.CreateCall(Fn, Ops, CI->getName());
1953  }
1954  llvm_unreachable("Unknown function for ARM CallBase upgrade.");
1955 }
1956 
1957 /// Upgrade a call to an old intrinsic. All argument and return casting must be
1958 /// provided to seamlessly integrate with existing context.
1960  Function *F = CI->getCalledFunction();
1961  LLVMContext &C = CI->getContext();
1963  Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
1964 
1965  assert(F && "Intrinsic call is not direct?");
1966 
1967  if (!NewFn) {
1968  // Get the Function's name.
1969  StringRef Name = F->getName();
1970 
1971  assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
1972  Name = Name.substr(5);
1973 
1974  bool IsX86 = Name.startswith("x86.");
1975  if (IsX86)
1976  Name = Name.substr(4);
1977  bool IsNVVM = Name.startswith("nvvm.");
1978  if (IsNVVM)
1979  Name = Name.substr(5);
1980  bool IsARM = Name.startswith("arm.");
1981  if (IsARM)
1982  Name = Name.substr(4);
1983 
1984  if (IsX86 && Name.startswith("sse4a.movnt.")) {
1985  Module *M = F->getParent();
1987  Elts.push_back(
1989  MDNode *Node = MDNode::get(C, Elts);
1990 
1991  Value *Arg0 = CI->getArgOperand(0);
1992  Value *Arg1 = CI->getArgOperand(1);
1993 
1994  // Nontemporal (unaligned) store of the 0'th element of the float/double
1995  // vector.
1996  Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
1997  PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
1998  Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
1999  Value *Extract =
2000  Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2001 
2002  StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, Align(1));
2003  SI->setMetadata(M->getMDKindID("nontemporal"), Node);
2004 
2005  // Remove intrinsic.
2006  CI->eraseFromParent();
2007  return;
2008  }
2009 
2010  if (IsX86 && (Name.startswith("avx.movnt.") ||
2011  Name.startswith("avx512.storent."))) {
2012  Module *M = F->getParent();
2014  Elts.push_back(
2016  MDNode *Node = MDNode::get(C, Elts);
2017 
2018  Value *Arg0 = CI->getArgOperand(0);
2019  Value *Arg1 = CI->getArgOperand(1);
2020 
2021  // Convert the type of the pointer to a pointer to the stored type.
2022  Value *BC = Builder.CreateBitCast(Arg0,
2024  "cast");
2025  StoreInst *SI = Builder.CreateAlignedStore(
2026  Arg1, BC,
2027  Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedSize() / 8));
2028  SI->setMetadata(M->getMDKindID("nontemporal"), Node);
2029 
2030  // Remove intrinsic.
2031  CI->eraseFromParent();
2032  return;
2033  }
2034 
2035  if (IsX86 && Name == "sse2.storel.dq") {
2036  Value *Arg0 = CI->getArgOperand(0);
2037  Value *Arg1 = CI->getArgOperand(1);
2038 
2039  auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2040  Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2041  Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2042  Value *BC = Builder.CreateBitCast(Arg0,
2044  "cast");
2045  Builder.CreateAlignedStore(Elt, BC, Align(1));
2046 
2047  // Remove intrinsic.
2048  CI->eraseFromParent();
2049  return;
2050  }
2051 
2052  if (IsX86 && (Name.startswith("sse.storeu.") ||
2053  Name.startswith("sse2.storeu.") ||
2054  Name.startswith("avx.storeu."))) {
2055  Value *Arg0 = CI->getArgOperand(0);
2056  Value *Arg1 = CI->getArgOperand(1);
2057 
2058  Arg0 = Builder.CreateBitCast(Arg0,
2060  "cast");
2061  Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2062 
2063  // Remove intrinsic.
2064  CI->eraseFromParent();
2065  return;
2066  }
2067 
2068  if (IsX86 && Name == "avx512.mask.store.ss") {
2069  Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2071  Mask, false);
2072 
2073  // Remove intrinsic.
2074  CI->eraseFromParent();
2075  return;
2076  }
2077 
2078  if (IsX86 && (Name.startswith("avx512.mask.store"))) {
2079  // "avx512.mask.storeu." or "avx512.mask.store."
2080  bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2082  CI->getArgOperand(2), Aligned);
2083 
2084  // Remove intrinsic.
2085  CI->eraseFromParent();
2086  return;
2087  }
2088 
2089  Value *Rep;
2090  // Upgrade packed integer vector compare intrinsics to compare instructions.
2091  if (IsX86 && (Name.startswith("sse2.pcmp") ||
2092  Name.startswith("avx2.pcmp"))) {
2093  // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2094  bool CmpEq = Name[9] == 'e';
2095  Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2096  CI->getArgOperand(0), CI->getArgOperand(1));
2097  Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2098  } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {
2099  Type *ExtTy = Type::getInt32Ty(C);
2100  if (CI->getOperand(0)->getType()->isIntegerTy(8))
2101  ExtTy = Type::getInt64Ty(C);
2102  unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2103  ExtTy->getPrimitiveSizeInBits();
2104  Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2105  Rep = Builder.CreateVectorSplat(NumElts, Rep);
2106  } else if (IsX86 && (Name == "sse.sqrt.ss" ||
2107  Name == "sse2.sqrt.sd")) {
2108  Value *Vec = CI->getArgOperand(0);
2109  Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2110  Function *Intr = Intrinsic::getDeclaration(F->getParent(),
2111  Intrinsic::sqrt, Elt0->getType());
2112  Elt0 = Builder.CreateCall(Intr, Elt0);
2113  Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2114  } else if (IsX86 && (Name.startswith("avx.sqrt.p") ||
2115  Name.startswith("sse2.sqrt.p") ||
2116  Name.startswith("sse.sqrt.p"))) {
2117  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2118  Intrinsic::sqrt,
2119  CI->getType()),
2120  {CI->getArgOperand(0)});
2121  } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) {
2122  if (CI->arg_size() == 4 &&
2123  (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2124  cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2125  Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2126  : Intrinsic::x86_avx512_sqrt_pd_512;
2127 
2128  Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
2129  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
2130  IID), Args);
2131  } else {
2132  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2133  Intrinsic::sqrt,
2134  CI->getType()),
2135  {CI->getArgOperand(0)});
2136  }
2137  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2138  CI->getArgOperand(1));
2139  } else if (IsX86 && (Name.startswith("avx512.ptestm") ||
2140  Name.startswith("avx512.ptestnm"))) {
2141  Value *Op0 = CI->getArgOperand(0);
2142  Value *Op1 = CI->getArgOperand(1);
2143  Value *Mask = CI->getArgOperand(2);
2144  Rep = Builder.CreateAnd(Op0, Op1);
2145  llvm::Type *Ty = Op0->getType();
2146  Value *Zero = llvm::Constant::getNullValue(Ty);
2147  ICmpInst::Predicate Pred =
2148  Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2149  Rep = Builder.CreateICmp(Pred, Rep, Zero);
2150  Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
2151  } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
2152  unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2153  ->getNumElements();
2154  Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2155  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2156  CI->getArgOperand(1));
2157  } else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
2158  unsigned NumElts = CI->getType()->getScalarSizeInBits();
2159  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2160  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2161  int Indices[64];
2162  for (unsigned i = 0; i != NumElts; ++i)
2163  Indices[i] = i;
2164 
2165  // First extract half of each vector. This gives better codegen than
2166  // doing it in a single shuffle.
2167  LHS = Builder.CreateShuffleVector(LHS, LHS,
2168  makeArrayRef(Indices, NumElts / 2));
2169  RHS = Builder.CreateShuffleVector(RHS, RHS,
2170  makeArrayRef(Indices, NumElts / 2));
2171  // Concat the vectors.
2172  // NOTE: Operands have to be swapped to match intrinsic definition.
2173  Rep = Builder.CreateShuffleVector(RHS, LHS,
2174  makeArrayRef(Indices, NumElts));
2175  Rep = Builder.CreateBitCast(Rep, CI->getType());
2176  } else if (IsX86 && Name == "avx512.kand.w") {
2177  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2178  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2179  Rep = Builder.CreateAnd(LHS, RHS);
2180  Rep = Builder.CreateBitCast(Rep, CI->getType());
2181  } else if (IsX86 && Name == "avx512.kandn.w") {
2182  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2183  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2184  LHS = Builder.CreateNot(LHS);
2185  Rep = Builder.CreateAnd(LHS, RHS);
2186  Rep = Builder.CreateBitCast(Rep, CI->getType());
2187  } else if (IsX86 && Name == "avx512.kor.w") {
2188  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2189  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2190  Rep = Builder.CreateOr(LHS, RHS);
2191  Rep = Builder.CreateBitCast(Rep, CI->getType());
2192  } else if (IsX86 && Name == "avx512.kxor.w") {
2193  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2194  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2195  Rep = Builder.CreateXor(LHS, RHS);
2196  Rep = Builder.CreateBitCast(Rep, CI->getType());
2197  } else if (IsX86 && Name == "avx512.kxnor.w") {
2198  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2199  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2200  LHS = Builder.CreateNot(LHS);
2201  Rep = Builder.CreateXor(LHS, RHS);
2202  Rep = Builder.CreateBitCast(Rep, CI->getType());
2203  } else if (IsX86 && Name == "avx512.knot.w") {
2204  Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2205  Rep = Builder.CreateNot(Rep);
2206  Rep = Builder.CreateBitCast(Rep, CI->getType());
2207  } else if (IsX86 &&
2208  (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
2209  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2210  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2211  Rep = Builder.CreateOr(LHS, RHS);
2212  Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2213  Value *C;
2214  if (Name[14] == 'c')
2215  C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2216  else
2217  C = ConstantInt::getNullValue(Builder.getInt16Ty());
2218  Rep = Builder.CreateICmpEQ(Rep, C);
2219  Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2220  } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2221  Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2222  Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2223  Name == "sse.div.ss" || Name == "sse2.div.sd")) {
2224  Type *I32Ty = Type::getInt32Ty(C);
2225  Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2226  ConstantInt::get(I32Ty, 0));
2227  Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2228  ConstantInt::get(I32Ty, 0));
2229  Value *EltOp;
2230  if (Name.contains(".add."))
2231  EltOp = Builder.CreateFAdd(Elt0, Elt1);
2232  else if (Name.contains(".sub."))
2233  EltOp = Builder.CreateFSub(Elt0, Elt1);
2234  else if (Name.contains(".mul."))
2235  EltOp = Builder.CreateFMul(Elt0, Elt1);
2236  else
2237  EltOp = Builder.CreateFDiv(Elt0, Elt1);
2238  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2239  ConstantInt::get(I32Ty, 0));
2240  } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
2241  // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2242  bool CmpEq = Name[16] == 'e';
2243  Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2244  } else if (IsX86 && Name.startswith("avx512.mask.vpshufbitqmb.")) {
2245  Type *OpTy = CI->getArgOperand(0)->getType();
2246  unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2247  Intrinsic::ID IID;
2248  switch (VecWidth) {
2249  default: llvm_unreachable("Unexpected intrinsic");
2250  case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break;
2251  case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break;
2252  case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break;
2253  }
2254 
2255  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2256  { CI->getOperand(0), CI->getArgOperand(1) });
2257  Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2258  } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
2259  Type *OpTy = CI->getArgOperand(0)->getType();
2260  unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2261  unsigned EltWidth = OpTy->getScalarSizeInBits();
2262  Intrinsic::ID IID;
2263  if (VecWidth == 128 && EltWidth == 32)
2264  IID = Intrinsic::x86_avx512_fpclass_ps_128;
2265  else if (VecWidth == 256 && EltWidth == 32)
2266  IID = Intrinsic::x86_avx512_fpclass_ps_256;
2267  else if (VecWidth == 512 && EltWidth == 32)
2268  IID = Intrinsic::x86_avx512_fpclass_ps_512;
2269  else if (VecWidth == 128 && EltWidth == 64)
2270  IID = Intrinsic::x86_avx512_fpclass_pd_128;
2271  else if (VecWidth == 256 && EltWidth == 64)
2272  IID = Intrinsic::x86_avx512_fpclass_pd_256;
2273  else if (VecWidth == 512 && EltWidth == 64)
2274  IID = Intrinsic::x86_avx512_fpclass_pd_512;
2275  else
2276  llvm_unreachable("Unexpected intrinsic");
2277 
2278  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2279  { CI->getOperand(0), CI->getArgOperand(1) });
2280  Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2281  } else if (IsX86 && Name.startswith("avx512.cmp.p")) {
2283  Type *OpTy = Args[0]->getType();
2284  unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2285  unsigned EltWidth = OpTy->getScalarSizeInBits();
2286  Intrinsic::ID IID;
2287  if (VecWidth == 128 && EltWidth == 32)
2288  IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2289  else if (VecWidth == 256 && EltWidth == 32)
2290  IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2291  else if (VecWidth == 512 && EltWidth == 32)
2292  IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2293  else if (VecWidth == 128 && EltWidth == 64)
2294  IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2295  else if (VecWidth == 256 && EltWidth == 64)
2296  IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2297  else if (VecWidth == 512 && EltWidth == 64)
2298  IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2299  else
2300  llvm_unreachable("Unexpected intrinsic");
2301 
2303  if (VecWidth == 512)
2304  std::swap(Mask, Args.back());
2305  Args.push_back(Mask);
2306 
2307  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2308  Args);
2309  } else if (IsX86 && Name.startswith("avx512.mask.cmp.")) {
2310  // Integer compare intrinsics.
2311  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2312  Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2313  } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) {
2314  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2315  Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2316  } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") ||
2317  Name.startswith("avx512.cvtw2mask.") ||
2318  Name.startswith("avx512.cvtd2mask.") ||
2319  Name.startswith("avx512.cvtq2mask."))) {
2320  Value *Op = CI->getArgOperand(0);
2321  Value *Zero = llvm::Constant::getNullValue(Op->getType());
2322  Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2323  Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2324  } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
2325  Name == "ssse3.pabs.w.128" ||
2326  Name == "ssse3.pabs.d.128" ||
2327  Name.startswith("avx2.pabs") ||
2328  Name.startswith("avx512.mask.pabs"))) {
2329  Rep = upgradeAbs(Builder, *CI);
2330  } else if (IsX86 && (Name == "sse41.pmaxsb" ||
2331  Name == "sse2.pmaxs.w" ||
2332  Name == "sse41.pmaxsd" ||
2333  Name.startswith("avx2.pmaxs") ||
2334  Name.startswith("avx512.mask.pmaxs"))) {
2336  } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
2337  Name == "sse41.pmaxuw" ||
2338  Name == "sse41.pmaxud" ||
2339  Name.startswith("avx2.pmaxu") ||
2340  Name.startswith("avx512.mask.pmaxu"))) {
2342  } else if (IsX86 && (Name == "sse41.pminsb" ||
2343  Name == "sse2.pmins.w" ||
2344  Name == "sse41.pminsd" ||
2345  Name.startswith("avx2.pmins") ||
2346  Name.startswith("avx512.mask.pmins"))) {
2348  } else if (IsX86 && (Name == "sse2.pminu.b" ||
2349  Name == "sse41.pminuw" ||
2350  Name == "sse41.pminud" ||
2351  Name.startswith("avx2.pminu") ||
2352  Name.startswith("avx512.mask.pminu"))) {
2354  } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
2355  Name == "avx2.pmulu.dq" ||
2356  Name == "avx512.pmulu.dq.512" ||
2357  Name.startswith("avx512.mask.pmulu.dq."))) {
2358  Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
2359  } else if (IsX86 && (Name == "sse41.pmuldq" ||
2360  Name == "avx2.pmul.dq" ||
2361  Name == "avx512.pmul.dq.512" ||
2362  Name.startswith("avx512.mask.pmul.dq."))) {
2363  Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
2364  } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
2365  Name == "sse2.cvtsi2sd" ||
2366  Name == "sse.cvtsi642ss" ||
2367  Name == "sse2.cvtsi642sd")) {
2368  Rep = Builder.CreateSIToFP(
2369  CI->getArgOperand(1),
2370  cast<VectorType>(CI->getType())->getElementType());
2371  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2372  } else if (IsX86 && Name == "avx512.cvtusi2sd") {
2373  Rep = Builder.CreateUIToFP(
2374  CI->getArgOperand(1),
2375  cast<VectorType>(CI->getType())->getElementType());
2376  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2377  } else if (IsX86 && Name == "sse2.cvtss2sd") {
2378  Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2379  Rep = Builder.CreateFPExt(
2380  Rep, cast<VectorType>(CI->getType())->getElementType());
2381  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2382  } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
2383  Name == "sse2.cvtdq2ps" ||
2384  Name == "avx.cvtdq2.pd.256" ||
2385  Name == "avx.cvtdq2.ps.256" ||
2386  Name.startswith("avx512.mask.cvtdq2pd.") ||
2387  Name.startswith("avx512.mask.cvtudq2pd.") ||
2388  Name.startswith("avx512.mask.cvtdq2ps.") ||
2389  Name.startswith("avx512.mask.cvtudq2ps.") ||
2390  Name.startswith("avx512.mask.cvtqq2pd.") ||
2391  Name.startswith("avx512.mask.cvtuqq2pd.") ||
2392  Name == "avx512.mask.cvtqq2ps.256" ||
2393  Name == "avx512.mask.cvtqq2ps.512" ||
2394  Name == "avx512.mask.cvtuqq2ps.256" ||
2395  Name == "avx512.mask.cvtuqq2ps.512" ||
2396  Name == "sse2.cvtps2pd" ||
2397  Name == "avx.cvt.ps2.pd.256" ||
2398  Name == "avx512.mask.cvtps2pd.128" ||
2399  Name == "avx512.mask.cvtps2pd.256")) {
2400  auto *DstTy = cast<FixedVectorType>(CI->getType());
2401  Rep = CI->getArgOperand(0);
2402  auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2403 
2404  unsigned NumDstElts = DstTy->getNumElements();
2405  if (NumDstElts < SrcTy->getNumElements()) {
2406  assert(NumDstElts == 2 && "Unexpected vector size");
2407  Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
2408  }
2409 
2410  bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2411  bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
2412  if (IsPS2PD)
2413  Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2414  else if (CI->arg_size() == 4 &&
2415  (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2416  cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2417  Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2418  : Intrinsic::x86_avx512_sitofp_round;
2420  { DstTy, SrcTy });
2421  Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) });
2422  } else {
2423  Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2424  : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2425  }
2426 
2427  if (CI->arg_size() >= 3)
2428  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2429  CI->getArgOperand(1));
2430  } else if (IsX86 && (Name.startswith("avx512.mask.vcvtph2ps.") ||
2431  Name.startswith("vcvtph2ps."))) {
2432  auto *DstTy = cast<FixedVectorType>(CI->getType());
2433  Rep = CI->getArgOperand(0);
2434  auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2435  unsigned NumDstElts = DstTy->getNumElements();
2436  if (NumDstElts != SrcTy->getNumElements()) {
2437  assert(NumDstElts == 4 && "Unexpected vector size");
2438  Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
2439  }
2440  Rep = Builder.CreateBitCast(
2441  Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
2442  Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
2443  if (CI->arg_size() >= 3)
2444  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2445  CI->getArgOperand(1));
2446  } else if (IsX86 && Name.startswith("avx512.mask.load")) {
2447  // "avx512.mask.loadu." or "avx512.mask.load."
2448  bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
2449  Rep =
2451  CI->getArgOperand(2), Aligned);
2452  } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
2453  auto *ResultTy = cast<FixedVectorType>(CI->getType());
2454  Type *PtrTy = ResultTy->getElementType();
2455 
2456  // Cast the pointer to element type.
2457  Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2459 
2460  Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2461  ResultTy->getNumElements());
2462 
2463  Function *ELd = Intrinsic::getDeclaration(F->getParent(),
2464  Intrinsic::masked_expandload,
2465  ResultTy);
2466  Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
2467  } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) {
2468  auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
2469  Type *PtrTy = ResultTy->getElementType();
2470 
2471  // Cast the pointer to element type.
2472  Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2474 
2475  Value *MaskVec =
2477  cast<FixedVectorType>(ResultTy)->getNumElements());
2478 
2479  Function *CSt = Intrinsic::getDeclaration(F->getParent(),
2480  Intrinsic::masked_compressstore,
2481  ResultTy);
2482  Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
2483  } else if (IsX86 && (Name.startswith("avx512.mask.compress.") ||
2484  Name.startswith("avx512.mask.expand."))) {
2485  auto *ResultTy = cast<FixedVectorType>(CI->getType());
2486 
2487  Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2488  ResultTy->getNumElements());
2489 
2490  bool IsCompress = Name[12] == 'c';
2491  Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2492  : Intrinsic::x86_avx512_mask_expand;
2493  Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
2494  Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1),
2495  MaskVec });
2496  } else if (IsX86 && Name.startswith("xop.vpcom")) {
2497  bool IsSigned;
2498  if (Name.endswith("ub") || Name.endswith("uw") || Name.endswith("ud") ||
2499  Name.endswith("uq"))
2500  IsSigned = false;
2501  else if (Name.endswith("b") || Name.endswith("w") || Name.endswith("d") ||
2502  Name.endswith("q"))
2503  IsSigned = true;
2504  else
2505  llvm_unreachable("Unknown suffix");
2506 
2507  unsigned Imm;
2508  if (CI->arg_size() == 3) {
2509  Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2510  } else {
2511  Name = Name.substr(9); // strip off "xop.vpcom"
2512  if (Name.startswith("lt"))
2513  Imm = 0;
2514  else if (Name.startswith("le"))
2515  Imm = 1;
2516  else if (Name.startswith("gt"))
2517  Imm = 2;
2518  else if (Name.startswith("ge"))
2519  Imm = 3;
2520  else if (Name.startswith("eq"))
2521  Imm = 4;
2522  else if (Name.startswith("ne"))
2523  Imm = 5;
2524  else if (Name.startswith("false"))
2525  Imm = 6;
2526  else if (Name.startswith("true"))
2527  Imm = 7;
2528  else
2529  llvm_unreachable("Unknown condition");
2530  }
2531 
2532  Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
2533  } else if (IsX86 && Name.startswith("xop.vpcmov")) {
2534  Value *Sel = CI->getArgOperand(2);
2535  Value *NotSel = Builder.CreateNot(Sel);
2536  Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2537  Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2538  Rep = Builder.CreateOr(Sel0, Sel1);
2539  } else if (IsX86 && (Name.startswith("xop.vprot") ||
2540  Name.startswith("avx512.prol") ||
2541  Name.startswith("avx512.mask.prol"))) {
2542  Rep = upgradeX86Rotate(Builder, *CI, false);
2543  } else if (IsX86 && (Name.startswith("avx512.pror") ||
2544  Name.startswith("avx512.mask.pror"))) {
2545  Rep = upgradeX86Rotate(Builder, *CI, true);
2546  } else if (IsX86 && (Name.startswith("avx512.vpshld.") ||
2547  Name.startswith("avx512.mask.vpshld") ||
2548  Name.startswith("avx512.maskz.vpshld"))) {
2549  bool ZeroMask = Name[11] == 'z';
2550  Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2551  } else if (IsX86 && (Name.startswith("avx512.vpshrd.") ||
2552  Name.startswith("avx512.mask.vpshrd") ||
2553  Name.startswith("avx512.maskz.vpshrd"))) {
2554  bool ZeroMask = Name[11] == 'z';
2555  Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
2556  } else if (IsX86 && Name == "sse42.crc32.64.8") {
2557  Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
2558  Intrinsic::x86_sse42_crc32_32_8);
2559  Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2560  Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
2561  Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2562  } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") ||
2563  Name.startswith("avx512.vbroadcast.s"))) {
2564  // Replace broadcasts with a series of insertelements.
2565  auto *VecTy = cast<FixedVectorType>(CI->getType());
2566  Type *EltTy = VecTy->getElementType();
2567  unsigned EltNum = VecTy->getNumElements();
2568  Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
2569  EltTy->getPointerTo());
2570  Value *Load = Builder.CreateLoad(EltTy, Cast);
2571  Type *I32Ty = Type::getInt32Ty(C);
2572  Rep = PoisonValue::get(VecTy);
2573  for (unsigned I = 0; I < EltNum; ++I)
2574  Rep = Builder.CreateInsertElement(Rep, Load,
2575  ConstantInt::get(I32Ty, I));
2576  } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
2577  Name.startswith("sse41.pmovzx") ||
2578  Name.startswith("avx2.pmovsx") ||
2579  Name.startswith("avx2.pmovzx") ||
2580  Name.startswith("avx512.mask.pmovsx") ||
2581  Name.startswith("avx512.mask.pmovzx"))) {
2582  auto *DstTy = cast<FixedVectorType>(CI->getType());
2583  unsigned NumDstElts = DstTy->getNumElements();
2584 
2585  // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2586  SmallVector<int, 8> ShuffleMask(NumDstElts);
2587  for (unsigned i = 0; i != NumDstElts; ++i)
2588  ShuffleMask[i] = i;
2589 
2590  Value *SV =
2591  Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
2592 
2593  bool DoSext = (StringRef::npos != Name.find("pmovsx"));
2594  Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
2595  : Builder.CreateZExt(SV, DstTy);
2596  // If there are 3 arguments, it's a masked intrinsic so we need a select.
2597  if (CI->arg_size() == 3)
2598  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2599  CI->getArgOperand(1));
2600  } else if (Name == "avx512.mask.pmov.qd.256" ||
2601  Name == "avx512.mask.pmov.qd.512" ||
2602  Name == "avx512.mask.pmov.wb.256" ||
2603  Name == "avx512.mask.pmov.wb.512") {
2604  Type *Ty = CI->getArgOperand(1)->getType();
2605  Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
2606  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2607  CI->getArgOperand(1));
2608  } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
2609  Name == "avx2.vbroadcasti128")) {
2610  // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2611  Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
2612  unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2613  auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
2614  Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
2616  Value *Load = Builder.CreateAlignedLoad(VT, Op, Align(1));
2617  if (NumSrcElts == 2)
2618  Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
2619  else
2620  Rep = Builder.CreateShuffleVector(
2621  Load, ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
2622  } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
2623  Name.startswith("avx512.mask.shuf.f"))) {
2624  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2625  Type *VT = CI->getType();
2626  unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2627  unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2628  unsigned ControlBitsMask = NumLanes - 1;
2629  unsigned NumControlBits = NumLanes / 2;
2630  SmallVector<int, 8> ShuffleMask(0);
2631 
2632  for (unsigned l = 0; l != NumLanes; ++l) {
2633  unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2634  // We actually need the other source.
2635  if (l >= NumLanes / 2)
2636  LaneMask += NumLanes;
2637  for (unsigned i = 0; i != NumElementsInLane; ++i)
2638  ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
2639  }
2640  Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2641  CI->getArgOperand(1), ShuffleMask);
2642  Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2643  CI->getArgOperand(3));
2644  }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
2645  Name.startswith("avx512.mask.broadcasti"))) {
2646  unsigned NumSrcElts =
2647  cast<FixedVectorType>(CI->getArgOperand(0)->getType())
2648  ->getNumElements();
2649  unsigned NumDstElts =
2650  cast<FixedVectorType>(CI->getType())->getNumElements();
2651 
2652  SmallVector<int, 8> ShuffleMask(NumDstElts);
2653  for (unsigned i = 0; i != NumDstElts; ++i)
2654  ShuffleMask[i] = i % NumSrcElts;
2655 
2656  Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2657  CI->getArgOperand(0),
2658  ShuffleMask);
2659  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2660  CI->getArgOperand(1));
2661  } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
2662  Name.startswith("avx2.vbroadcast") ||
2663  Name.startswith("avx512.pbroadcast") ||
2664  Name.startswith("avx512.mask.broadcast.s"))) {
2665  // Replace vp?broadcasts with a vector shuffle.
2666  Value *Op = CI->getArgOperand(0);
2667  ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
2668  Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
2671  Rep = Builder.CreateShuffleVector(Op, M);
2672 
2673  if (CI->arg_size() == 3)
2674  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2675  CI->getArgOperand(1));
2676  } else if (IsX86 && (Name.startswith("sse2.padds.") ||
2677  Name.startswith("avx2.padds.") ||
2678  Name.startswith("avx512.padds.") ||
2679  Name.startswith("avx512.mask.padds."))) {
2680  Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
2681  } else if (IsX86 && (Name.startswith("sse2.psubs.") ||
2682  Name.startswith("avx2.psubs.") ||
2683  Name.startswith("avx512.psubs.") ||
2684  Name.startswith("avx512.mask.psubs."))) {
2685  Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
2686  } else if (IsX86 && (Name.startswith("sse2.paddus.") ||
2687  Name.startswith("avx2.paddus.") ||
2688  Name.startswith("avx512.mask.paddus."))) {
2689  Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
2690  } else if (IsX86 && (Name.startswith("sse2.psubus.") ||
2691  Name.startswith("avx2.psubus.") ||
2692  Name.startswith("avx512.mask.psubus."))) {
2693  Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
2694  } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
2696  CI->getArgOperand(1),
2697  CI->getArgOperand(2),
2698  CI->getArgOperand(3),
2699  CI->getArgOperand(4),
2700  false);
2701  } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
2703  CI->getArgOperand(1),
2704  CI->getArgOperand(2),
2705  CI->getArgOperand(3),
2706  CI->getArgOperand(4),
2707  true);
2708  } else if (IsX86 && (Name == "sse2.psll.dq" ||
2709  Name == "avx2.psll.dq")) {
2710  // 128/256-bit shift left specified in bits.
2711  unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2713  Shift / 8); // Shift is in bits.
2714  } else if (IsX86 && (Name == "sse2.psrl.dq" ||
2715  Name == "avx2.psrl.dq")) {
2716  // 128/256-bit shift right specified in bits.
2717  unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2719  Shift / 8); // Shift is in bits.
2720  } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
2721  Name == "avx2.psll.dq.bs" ||
2722  Name == "avx512.psll.dq.512")) {
2723  // 128/256/512-bit shift left specified in bytes.
2724  unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2726  } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
2727  Name == "avx2.psrl.dq.bs" ||
2728  Name == "avx512.psrl.dq.512")) {
2729  // 128/256/512-bit shift right specified in bytes.
2730  unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2732  } else if (IsX86 && (Name == "sse41.pblendw" ||
2733  Name.startswith("sse41.blendp") ||
2734  Name.startswith("avx.blend.p") ||
2735  Name == "avx2.pblendw" ||
2736  Name.startswith("avx2.pblendd."))) {
2737  Value *Op0 = CI->getArgOperand(0);
2738  Value *Op1 = CI->getArgOperand(1);
2739  unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2740  auto *VecTy = cast<FixedVectorType>(CI->getType());
2741  unsigned NumElts = VecTy->getNumElements();
2742 
2743  SmallVector<int, 16> Idxs(NumElts);
2744  for (unsigned i = 0; i != NumElts; ++i)
2745  Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
2746 
2747  Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2748  } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
2749  Name == "avx2.vinserti128" ||
2750  Name.startswith("avx512.mask.insert"))) {
2751  Value *Op0 = CI->getArgOperand(0);
2752  Value *Op1 = CI->getArgOperand(1);
2753  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2754  unsigned DstNumElts =
2755  cast<FixedVectorType>(CI->getType())->getNumElements();
2756  unsigned SrcNumElts =
2757  cast<FixedVectorType>(Op1->getType())->getNumElements();
2758  unsigned Scale = DstNumElts / SrcNumElts;
2759 
2760  // Mask off the high bits of the immediate value; hardware ignores those.
2761  Imm = Imm % Scale;
2762 
2763  // Extend the second operand into a vector the size of the destination.
2764  SmallVector<int, 8> Idxs(DstNumElts);
2765  for (unsigned i = 0; i != SrcNumElts; ++i)
2766  Idxs[i] = i;
2767  for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
2768  Idxs[i] = SrcNumElts;
2769  Rep = Builder.CreateShuffleVector(Op1, Idxs);
2770 
2771  // Insert the second operand into the first operand.
2772 
2773  // Note that there is no guarantee that instruction lowering will actually
2774  // produce a vinsertf128 instruction for the created shuffles. In
2775  // particular, the 0 immediate case involves no lane changes, so it can
2776  // be handled as a blend.
2777 
2778  // Example of shuffle mask for 32-bit elements:
2779  // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
2780  // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
2781 
2782  // First fill with identify mask.
2783  for (unsigned i = 0; i != DstNumElts; ++i)
2784  Idxs[i] = i;
2785  // Then replace the elements where we need to insert.
2786  for (unsigned i = 0; i != SrcNumElts; ++i)
2787  Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
2788  Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
2789 
2790  // If the intrinsic has a mask operand, handle that.
2791  if (CI->arg_size() == 5)
2792  Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2793  CI->getArgOperand(3));
2794  } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
2795  Name == "avx2.vextracti128" ||
2796  Name.startswith("avx512.mask.vextract"))) {
2797  Value *Op0 = CI->getArgOperand(0);
2798  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2799  unsigned DstNumElts =
2800  cast<FixedVectorType>(CI->getType())->getNumElements();
2801  unsigned SrcNumElts =
2802  cast<FixedVectorType>(Op0->getType())->getNumElements();
2803  unsigned Scale = SrcNumElts / DstNumElts;
2804 
2805  // Mask off the high bits of the immediate value; hardware ignores those.
2806  Imm = Imm % Scale;
2807 
2808  // Get indexes for the subvector of the input vector.
2809  SmallVector<int, 8> Idxs(DstNumElts);
2810  for (unsigned i = 0; i != DstNumElts; ++i) {
2811  Idxs[i] = i + (Imm * DstNumElts);
2812  }
2813  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2814 
2815  // If the intrinsic has a mask operand, handle that.
2816  if (CI->arg_size() == 4)
2817  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2818  CI->getArgOperand(2));
2819  } else if (!IsX86 && Name == "stackprotectorcheck") {
2820  Rep = nullptr;
2821  } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
2822  Name.startswith("avx512.mask.perm.di."))) {
2823  Value *Op0 = CI->getArgOperand(0);
2824  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2825  auto *VecTy = cast<FixedVectorType>(CI->getType());
2826  unsigned NumElts = VecTy->getNumElements();
2827 
2828  SmallVector<int, 8> Idxs(NumElts);
2829  for (unsigned i = 0; i != NumElts; ++i)
2830  Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
2831 
2832  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2833 
2834  if (CI->arg_size() == 4)
2835  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2836  CI->getArgOperand(2));
2837  } else if (IsX86 && (Name.startswith("avx.vperm2f128.") ||
2838  Name == "avx2.vperm2i128")) {
2839  // The immediate permute control byte looks like this:
2840  // [1:0] - select 128 bits from sources for low half of destination
2841  // [2] - ignore
2842  // [3] - zero low half of destination
2843  // [5:4] - select 128 bits from sources for high half of destination
2844  // [6] - ignore
2845  // [7] - zero high half of destination
2846 
2847  uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2848 
2849  unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2850  unsigned HalfSize = NumElts / 2;
2851  SmallVector<int, 8> ShuffleMask(NumElts);
2852 
2853  // Determine which operand(s) are actually in use for this instruction.
2854  Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2855  Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2856 
2857  // If needed, replace operands based on zero mask.
2858  V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
2859  V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
2860 
2861  // Permute low half of result.
2862  unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
2863  for (unsigned i = 0; i < HalfSize; ++i)
2864  ShuffleMask[i] = StartIndex + i;
2865 
2866  // Permute high half of result.
2867  StartIndex = (Imm & 0x10) ? HalfSize : 0;
2868  for (unsigned i = 0; i < HalfSize; ++i)
2869  ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
2870 
2871  Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2872 
2873  } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
2874  Name == "sse2.pshuf.d" ||
2875  Name.startswith("avx512.mask.vpermil.p") ||
2876  Name.startswith("avx512.mask.pshuf.d."))) {
2877  Value *Op0 = CI->getArgOperand(0);
2878  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2879  auto *VecTy = cast<FixedVectorType>(CI->getType());
2880  unsigned NumElts = VecTy->getNumElements();
2881  // Calculate the size of each index in the immediate.
2882  unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
2883  unsigned IdxMask = ((1 << IdxSize) - 1);
2884 
2885  SmallVector<int, 8> Idxs(NumElts);
2886  // Lookup the bits for this element, wrapping around the immediate every
2887  // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
2888  // to offset by the first index of each group.
2889  for (unsigned i = 0; i != NumElts; ++i)
2890  Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
2891 
2892  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2893 
2894  if (CI->arg_size() == 4)
2895  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2896  CI->getArgOperand(2));
2897  } else if (IsX86 && (Name == "sse2.pshufl.w" ||
2898  Name.startswith("avx512.mask.pshufl.w."))) {
2899  Value *Op0 = CI->getArgOperand(0);
2900  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2901  unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2902 
2903  SmallVector<int, 16> Idxs(NumElts);
2904  for (unsigned l = 0; l != NumElts; l += 8) {
2905  for (unsigned i = 0; i != 4; ++i)
2906  Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
2907  for (unsigned i = 4; i != 8; ++i)
2908  Idxs[i + l] = i + l;
2909  }
2910 
2911  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2912 
2913  if (CI->arg_size() == 4)
2914  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2915  CI->getArgOperand(2));
2916  } else if (IsX86 && (Name == "sse2.pshufh.w" ||
2917  Name.startswith("avx512.mask.pshufh.w."))) {
2918  Value *Op0 = CI->getArgOperand(0);
2919  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2920  unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2921 
2922  SmallVector<int, 16> Idxs(NumElts);
2923  for (unsigned l = 0; l != NumElts; l += 8) {
2924  for (unsigned i = 0; i != 4; ++i)
2925  Idxs[i + l] = i + l;
2926  for (unsigned i = 0; i != 4; ++i)
2927  Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
2928  }
2929 
2930  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2931 
2932  if (CI->arg_size() == 4)
2933  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2934  CI->getArgOperand(2));
2935  } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
2936  Value *Op0 = CI->getArgOperand(0);
2937  Value *Op1 = CI->getArgOperand(1);
2938  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2939  unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2940 
2941  unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2942  unsigned HalfLaneElts = NumLaneElts / 2;
2943 
2944  SmallVector<int, 16> Idxs(NumElts);
2945  for (unsigned i = 0; i != NumElts; ++i) {
2946  // Base index is the starting element of the lane.
2947  Idxs[i] = i - (i % NumLaneElts);
2948  // If we are half way through the lane switch to the other source.
2949  if ((i % NumLaneElts) >= HalfLaneElts)
2950  Idxs[i] += NumElts;
2951  // Now select the specific element. By adding HalfLaneElts bits from
2952  // the immediate. Wrapping around the immediate every 8-bits.
2953  Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
2954  }
2955 
2956  Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2957 
2958  Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2959  CI->getArgOperand(3));
2960  } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
2961  Name.startswith("avx512.mask.movshdup") ||
2962  Name.startswith("avx512.mask.movsldup"))) {
2963  Value *Op0 = CI->getArgOperand(0);
2964  unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2965  unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2966 
2967  unsigned Offset = 0;
2968  if (Name.startswith("avx512.mask.movshdup."))
2969  Offset = 1;
2970 
2971  SmallVector<int, 16> Idxs(NumElts);
2972  for (unsigned l = 0; l != NumElts; l += NumLaneElts)
2973  for (unsigned i = 0; i != NumLaneElts; i += 2) {
2974  Idxs[i + l + 0] = i + l + Offset;
2975  Idxs[i + l + 1] = i + l + Offset;
2976  }
2977 
2978  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2979 
2980  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2981  CI->getArgOperand(1));
2982  } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
2983  Name.startswith("avx512.mask.unpckl."))) {
2984  Value *Op0 = CI->getArgOperand(0);
2985  Value *Op1 = CI->getArgOperand(1);
2986  int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2987  int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2988 
2989  SmallVector<int, 64> Idxs(NumElts);
2990  for (int l = 0; l != NumElts; l += NumLaneElts)
2991  for (int i = 0; i != NumLaneElts; ++i)
2992  Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
2993 
2994  Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2995 
2996  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2997  CI->getArgOperand(2));
2998  } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
2999  Name.startswith("avx512.mask.unpckh."))) {
3000  Value *Op0 = CI->getArgOperand(0);
3001  Value *Op1 = CI->getArgOperand(1);
3002  int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3003  int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
3004 
3005  SmallVector<int, 64> Idxs(NumElts);
3006  for (int l = 0; l != NumElts; l += NumLaneElts)
3007  for (int i = 0; i != NumLaneElts; ++i)
3008  Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3009 
3010  Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3011 
3012  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3013  CI->getArgOperand(2));
3014  } else if (IsX86 && (Name.startswith("avx512.mask.and.") ||
3015  Name.startswith("avx512.mask.pand."))) {
3016  VectorType *FTy = cast<VectorType>(CI->getType());
3017  VectorType *ITy = VectorType::getInteger(FTy);
3018  Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3019  Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3020  Rep = Builder.CreateBitCast(Rep, FTy);
3021  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3022  CI->getArgOperand(2));
3023  } else if (IsX86 && (Name.startswith("avx512.mask.andn.") ||
3024  Name.startswith("avx512.mask.pandn."))) {
3025  VectorType *FTy = cast<VectorType>(CI->getType());
3026  VectorType *ITy = VectorType::getInteger(FTy);
3027  Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3028  Rep = Builder.CreateAnd(Rep,
3029  Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3030  Rep = Builder.CreateBitCast(Rep, FTy);
3031  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3032  CI->getArgOperand(2));
3033  } else if (IsX86 && (Name.startswith("avx512.mask.or.") ||
3034  Name.startswith("avx512.mask.por."))) {
3035  VectorType *FTy = cast<VectorType>(CI->getType());
3036  VectorType *ITy = VectorType::getInteger(FTy);
3037  Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3038  Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3039  Rep = Builder.CreateBitCast(Rep, FTy);
3040  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3041  CI->getArgOperand(2));
3042  } else if (IsX86 && (Name.startswith("avx512.mask.xor.") ||
3043  Name.startswith("avx512.mask.pxor."))) {
3044  VectorType *FTy = cast<VectorType>(CI->getType());
3045  VectorType *ITy = VectorType::getInteger(FTy);
3046  Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3047  Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3048  Rep = Builder.CreateBitCast(Rep, FTy);
3049  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3050  CI->getArgOperand(2));
3051  } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
3052  Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3053  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3054  CI->getArgOperand(2));
3055  } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
3056  Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3057  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3058  CI->getArgOperand(2));
3059  } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
3060  Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3061  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3062  CI->getArgOperand(2));
3063  } else if (IsX86 && Name.startswith("avx512.mask.add.p")) {
3064  if (Name.endswith(".512")) {
3065  Intrinsic::ID IID;
3066  if (Name[17] == 's')
3067  IID = Intrinsic::x86_avx512_add_ps_512;
3068  else
3069  IID = Intrinsic::x86_avx512_add_pd_512;
3070 
3071  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3072  { CI->getArgOperand(0), CI->getArgOperand(1),
3073  CI->getArgOperand(4) });
3074  } else {
3075  Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3076  }
3077  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3078  CI->getArgOperand(2));
3079  } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
3080  if (Name.endswith(".512")) {
3081  Intrinsic::ID IID;
3082  if (Name[17] == 's')
3083  IID = Intrinsic::x86_avx512_div_ps_512;
3084  else
3085  IID = Intrinsic::x86_avx512_div_pd_512;
3086 
3087  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3088  { CI->getArgOperand(0), CI->getArgOperand(1),
3089  CI->getArgOperand(4) });
3090  } else {
3091  Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3092  }
3093  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3094  CI->getArgOperand(2));
3095  } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
3096  if (Name.endswith(".512")) {
3097  Intrinsic::ID IID;
3098  if (Name[17] == 's')
3099  IID = Intrinsic::x86_avx512_mul_ps_512;
3100  else
3101  IID = Intrinsic::x86_avx512_mul_pd_512;
3102 
3103  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3104  { CI->getArgOperand(0), CI->getArgOperand(1),
3105  CI->getArgOperand(4) });
3106  } else {
3107  Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3108  }
3109  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3110  CI->getArgOperand(2));
3111  } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
3112  if (Name.endswith(".512")) {
3113  Intrinsic::ID IID;
3114  if (Name[17] == 's')
3115  IID = Intrinsic::x86_avx512_sub_ps_512;
3116  else
3117  IID = Intrinsic::x86_avx512_sub_pd_512;
3118 
3119  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3120  { CI->getArgOperand(0), CI->getArgOperand(1),
3121  CI->getArgOperand(4) });
3122  } else {
3123  Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3124  }
3125  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3126  CI->getArgOperand(2));
3127  } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
3128  Name.startswith("avx512.mask.min.p")) &&
3129  Name.drop_front(18) == ".512") {
3130  bool IsDouble = Name[17] == 'd';
3131  bool IsMin = Name[13] == 'i';
3132  static const Intrinsic::ID MinMaxTbl[2][2] = {
3133  { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
3134  { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
3135  };
3136  Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3137 
3138  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3139  { CI->getArgOperand(0), CI->getArgOperand(1),
3140  CI->getArgOperand(4) });
3141  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3142  CI->getArgOperand(2));
3143  } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
3144  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
3145  Intrinsic::ctlz,
3146  CI->getType()),
3147  { CI->getArgOperand(0), Builder.getInt1(false) });
3148  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
3149  CI->getArgOperand(1));
3150  } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
3151  bool IsImmediate = Name[16] == 'i' ||
3152  (Name.size() > 18 && Name[18] == 'i');
3153  bool IsVariable = Name[16] == 'v';
3154  char Size = Name[16] == '.' ? Name[17] :
3155  Name[17] == '.' ? Name[18] :
3156  Name[18] == '.' ? Name[19] :
3157  Name[20];
3158 
3159  Intrinsic::ID IID;
3160  if (IsVariable && Name[17] != '.') {
3161  if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3162  IID = Intrinsic::x86_avx2_psllv_q;
3163  else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3164  IID = Intrinsic::x86_avx2_psllv_q_256;
3165  else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3166  IID = Intrinsic::x86_avx2_psllv_d;
3167  else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3168  IID = Intrinsic::x86_avx2_psllv_d_256;
3169  else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3170  IID = Intrinsic::x86_avx512_psllv_w_128;
3171  else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3172  IID = Intrinsic::x86_avx512_psllv_w_256;
3173  else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3174  IID = Intrinsic::x86_avx512_psllv_w_512;
3175  else
3176  llvm_unreachable("Unexpected size");
3177  } else if (Name.endswith(".128")) {
3178  if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3179  IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3180  : Intrinsic::x86_sse2_psll_d;
3181  else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3182  IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3183  : Intrinsic::x86_sse2_psll_q;
3184  else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3185  IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3186  : Intrinsic::x86_sse2_psll_w;
3187  else
3188  llvm_unreachable("Unexpected size");
3189  } else if (Name.endswith(".256")) {
3190  if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3191  IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3192  : Intrinsic::x86_avx2_psll_d;
3193  else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3194  IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3195  : Intrinsic::x86_avx2_psll_q;
3196  else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3197  IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3198  : Intrinsic::x86_avx2_psll_w;
3199  else
3200  llvm_unreachable("Unexpected size");
3201  } else {
3202  if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3203  IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
3204  IsVariable ? Intrinsic::x86_avx512_psllv_d_512 :
3205  Intrinsic::x86_avx512_psll_d_512;
3206  else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3207  IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
3208  IsVariable ? Intrinsic::x86_avx512_psllv_q_512 :
3209  Intrinsic::x86_avx512_psll_q_512;
3210  else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3211  IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3212  : Intrinsic::x86_avx512_psll_w_512;
3213  else
3214  llvm_unreachable("Unexpected size");
3215  }
3216 
3217  Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3218  } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
3219  bool IsImmediate = Name[16] == 'i' ||
3220  (Name.size() > 18 && Name[18] == 'i');
3221  bool IsVariable = Name[16] == 'v';
3222  char Size = Name[16] == '.' ? Name[17] :
3223  Name[17] == '.' ? Name[18] :
3224  Name[18] == '.' ? Name[19] :
3225  Name[20];
3226 
3227  Intrinsic::ID IID;
3228  if (IsVariable && Name[17] != '.') {
3229  if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3230  IID = Intrinsic::x86_avx2_psrlv_q;
3231  else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3232  IID = Intrinsic::x86_avx2_psrlv_q_256;
3233  else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3234  IID = Intrinsic::x86_avx2_psrlv_d;
3235  else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3236  IID = Intrinsic::x86_avx2_psrlv_d_256;
3237  else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3238  IID = Intrinsic::x86_avx512_psrlv_w_128;
3239  else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3240  IID = Intrinsic::x86_avx512_psrlv_w_256;
3241  else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3242  IID = Intrinsic::x86_avx512_psrlv_w_512;
3243  else
3244  llvm_unreachable("Unexpected size");
3245  } else if (Name.endswith(".128")) {
3246  if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3247  IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3248  : Intrinsic::x86_sse2_psrl_d;
3249  else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3250  IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3251  : Intrinsic::x86_sse2_psrl_q;
3252  else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3253  IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3254  : Intrinsic::x86_sse2_psrl_w;
3255  else
3256  llvm_unreachable("Unexpected size");
3257  } else if (Name.endswith(".256")) {
3258  if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3259  IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3260  : Intrinsic::x86_avx2_psrl_d;
3261  else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3262  IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3263  : Intrinsic::x86_avx2_psrl_q;
3264  else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3265  IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3266  : Intrinsic::x86_avx2_psrl_w;
3267  else
3268  llvm_unreachable("Unexpected size");
3269  } else {
3270  if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3271  IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
3272  IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 :
3273  Intrinsic::x86_avx512_psrl_d_512;
3274  else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3275  IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
3276  IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 :
3277  Intrinsic::x86_avx512_psrl_q_512;
3278  else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3279  IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3280  : Intrinsic::x86_avx512_psrl_w_512;
3281  else
3282  llvm_unreachable("Unexpected size");
3283  }
3284 
3285  Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3286  } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
3287  bool IsImmediate = Name[16] == 'i' ||
3288  (Name.size() > 18 && Name[18] == 'i');
3289  bool IsVariable = Name[16] == 'v';
3290  char Size = Name[16] == '.' ? Name[17] :
3291  Name[17] == '.' ? Name[18] :
3292  Name[18] == '.' ? Name[19] :
3293  Name[20];
3294 
3295  Intrinsic::ID IID;
3296  if (IsVariable && Name[17] != '.') {
3297  if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3298  IID = Intrinsic::x86_avx2_psrav_d;
3299  else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3300  IID = Intrinsic::x86_avx2_psrav_d_256;
3301  else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3302  IID = Intrinsic::x86_avx512_psrav_w_128;
3303  else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3304  IID = Intrinsic::x86_avx512_psrav_w_256;
3305  else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3306  IID = Intrinsic::x86_avx512_psrav_w_512;
3307  else
3308  llvm_unreachable("Unexpected size");
3309  } else if (Name.endswith(".128")) {
3310  if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3311  IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3312  : Intrinsic::x86_sse2_psra_d;
3313  else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3314  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
3315  IsVariable ? Intrinsic::x86_avx512_psrav_q_128 :
3316  Intrinsic::x86_avx512_psra_q_128;
3317  else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3318  IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3319  : Intrinsic::x86_sse2_psra_w;
3320  else
3321  llvm_unreachable("Unexpected size");
3322  } else if (Name.endswith(".256")) {
3323  if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3324  IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3325  : Intrinsic::x86_avx2_psra_d;
3326  else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3327  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
3328  IsVariable ? Intrinsic::x86_avx512_psrav_q_256 :
3329  Intrinsic::x86_avx512_psra_q_256;
3330  else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3331  IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3332  : Intrinsic::x86_avx2_psra_w;
3333  else
3334  llvm_unreachable("Unexpected size");
3335  } else {
3336  if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3337  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
3338  IsVariable ? Intrinsic::x86_avx512_psrav_d_512 :
3339  Intrinsic::x86_avx512_psra_d_512;
3340  else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3341  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
3342  IsVariable ? Intrinsic::x86_avx512_psrav_q_512 :
3343  Intrinsic::x86_avx512_psra_q_512;
3344  else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3345  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3346  : Intrinsic::x86_avx512_psra_w_512;
3347  else
3348  llvm_unreachable("Unexpected size");
3349  }
3350 
3351  Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3352  } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
3353  Rep = upgradeMaskedMove(Builder, *CI);
3354  } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
3355  Rep = UpgradeMaskToInt(Builder, *CI);
3356  } else if (IsX86 && Name.endswith(".movntdqa")) {
3357  Module *M = F->getParent();
3358  MDNode *Node = MDNode::get(
3360 
3361  Value *Ptr = CI->getArgOperand(0);
3362 
3363  // Convert the type of the pointer to a pointer to the stored type.
3364  Value *BC = Builder.CreateBitCast(
3365  Ptr, PointerType::getUnqual(CI->getType()), "cast");
3366  LoadInst *LI = Builder.CreateAlignedLoad(
3367  CI->getType(), BC,
3369  LI->setMetadata(M->getMDKindID("nontemporal"), Node);
3370  Rep = LI;
3371  } else if (IsX86 && (Name.startswith("fma.vfmadd.") ||
3372  Name.startswith("fma.vfmsub.") ||
3373  Name.startswith("fma.vfnmadd.") ||
3374  Name.startswith("fma.vfnmsub."))) {
3375  bool NegMul = Name[6] == 'n';
3376  bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3377  bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3378 
3379  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3380  CI->getArgOperand(2) };
3381 
3382  if (IsScalar) {
3383  Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3384  Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3385  Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3386  }
3387 
3388  if (NegMul && !IsScalar)
3389  Ops[0] = Builder.CreateFNeg(Ops[0]);
3390  if (NegMul && IsScalar)
3391  Ops[1] = Builder.CreateFNeg(Ops[1]);
3392  if (NegAcc)
3393  Ops[2] = Builder.CreateFNeg(Ops[2]);
3394 
3395  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3396  Intrinsic::fma,
3397  Ops[0]->getType()),
3398  Ops);
3399 
3400  if (IsScalar)
3401  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
3402  (uint64_t)0);
3403  } else if (IsX86 && Name.startswith("fma4.vfmadd.s")) {
3404  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3405  CI->getArgOperand(2) };
3406 
3407  Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3408  Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3409  Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3410 
3411  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3412  Intrinsic::fma,
3413  Ops[0]->getType()),
3414  Ops);
3415 
3416  Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
3417  Rep, (uint64_t)0);
3418  } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") ||
3419  Name.startswith("avx512.maskz.vfmadd.s") ||
3420  Name.startswith("avx512.mask3.vfmadd.s") ||
3421  Name.startswith("avx512.mask3.vfmsub.s") ||
3422  Name.startswith("avx512.mask3.vfnmsub.s"))) {
3423  bool IsMask3 = Name[11] == '3';
3424  bool IsMaskZ = Name[11] == 'z';
3425  // Drop the "avx512.mask." to make it easier.
3426  Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3427  bool NegMul = Name[2] == 'n';
3428  bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3429 
3430  Value *A = CI->getArgOperand(0);
3431  Value *B = CI->getArgOperand(1);
3432  Value *C = CI->getArgOperand(2);
3433 
3434  if (NegMul && (IsMask3 || IsMaskZ))
3435  A = Builder.CreateFNeg(A);
3436  if (NegMul && !(IsMask3 || IsMaskZ))
3437  B = Builder.CreateFNeg(B);
3438  if (NegAcc)
3439  C = Builder.CreateFNeg(C);
3440 
3441  A = Builder.CreateExtractElement(A, (uint64_t)0);
3442  B = Builder.CreateExtractElement(B, (uint64_t)0);
3443  C = Builder.CreateExtractElement(C, (uint64_t)0);
3444 
3445  if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3446  cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3447  Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
3448 
3449  Intrinsic::ID IID;
3450  if (Name.back() == 'd')
3451  IID = Intrinsic::x86_avx512_vfmadd_f64;
3452  else
3453  IID = Intrinsic::x86_avx512_vfmadd_f32;
3455  Rep = Builder.CreateCall(FMA, Ops);
3456  } else {
3458  Intrinsic::fma,
3459  A->getType());
3460  Rep = Builder.CreateCall(FMA, { A, B, C });
3461  }
3462 
3463  Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
3464  IsMask3 ? C : A;
3465 
3466  // For Mask3 with NegAcc, we need to create a new extractelement that
3467  // avoids the negation above.
3468  if (NegAcc && IsMask3)
3469  PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
3470  (uint64_t)0);
3471 
3473  Rep, PassThru);
3474  Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
3475  Rep, (uint64_t)0);
3476  } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") ||
3477  Name.startswith("avx512.mask.vfnmadd.p") ||
3478  Name.startswith("avx512.mask.vfnmsub.p") ||
3479  Name.startswith("avx512.mask3.vfmadd.p") ||
3480  Name.startswith("avx512.mask3.vfmsub.p") ||
3481  Name.startswith("avx512.mask3.vfnmsub.p") ||
3482  Name.startswith("avx512.maskz.vfmadd.p"))) {
3483  bool IsMask3 = Name[11] == '3';
3484  bool IsMaskZ = Name[11] == 'z';
3485  // Drop the "avx512.mask." to make it easier.
3486  Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3487  bool NegMul = Name[2] == 'n';
3488  bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3489 
3490  Value *A = CI->getArgOperand(0);
3491  Value *B = CI->getArgOperand(1);
3492  Value *C = CI->getArgOperand(2);
3493 
3494  if (NegMul && (IsMask3 || IsMaskZ))
3495  A = Builder.CreateFNeg(A);
3496  if (NegMul && !(IsMask3 || IsMaskZ))
3497  B = Builder.CreateFNeg(B);
3498  if (NegAcc)
3499  C = Builder.CreateFNeg(C);
3500 
3501  if (CI->arg_size() == 5 &&
3502  (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3503  cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3504  Intrinsic::ID IID;
3505  // Check the character before ".512" in string.
3506  if (Name[Name.size()-5] == 's')
3507  IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3508  else
3509  IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3510 
3511  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3512  { A, B, C, CI->getArgOperand(4) });
3513  } else {
3515  Intrinsic::fma,
3516  A->getType());
3517  Rep = Builder.CreateCall(FMA, { A, B, C });
3518  }
3519 
3520  Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3521  IsMask3 ? CI->getArgOperand(2) :
3522  CI->getArgOperand(0);
3523 
3524  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3525  } else if (IsX86 && Name.startswith("fma.vfmsubadd.p")) {
3526  unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3527  unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3528  Intrinsic::ID IID;
3529  if (VecWidth == 128 && EltWidth == 32)
3530  IID = Intrinsic::x86_fma_vfmaddsub_ps;
3531  else if (VecWidth == 256 && EltWidth == 32)
3532  IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
3533  else if (VecWidth == 128 && EltWidth == 64)
3534  IID = Intrinsic::x86_fma_vfmaddsub_pd;
3535  else if (VecWidth == 256 && EltWidth == 64)
3536  IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
3537  else
3538  llvm_unreachable("Unexpected intrinsic");
3539 
3540  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3541  CI->getArgOperand(2) };
3542  Ops[2] = Builder.CreateFNeg(Ops[2]);
3543  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3544  Ops);
3545  } else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") ||
3546  Name.startswith("avx512.mask3.vfmaddsub.p") ||
3547  Name.startswith("avx512.maskz.vfmaddsub.p") ||
3548  Name.startswith("avx512.mask3.vfmsubadd.p"))) {
3549  bool IsMask3 = Name[11] == '3';
3550  bool IsMaskZ = Name[11] == 'z';
3551  // Drop the "avx512.mask." to make it easier.
3552  Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3553  bool IsSubAdd = Name[3] == 's';
3554  if (CI->arg_size() == 5) {
3555  Intrinsic::ID IID;
3556  // Check the character before ".512" in string.
3557  if (Name[Name.size()-5] == 's')
3558  IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3559  else
3560  IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3561 
3562  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3563  CI->getArgOperand(2), CI->getArgOperand(4) };
3564  if (IsSubAdd)
3565  Ops[2] = Builder.CreateFNeg(Ops[2]);
3566 
3567  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3568  Ops);
3569  } else {
3570  int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3571 
3572  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3573  CI->getArgOperand(2) };
3574 
3575  Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3576  Ops[0]->getType());
3577  Value *Odd = Builder.CreateCall(FMA, Ops);
3578  Ops[2] = Builder.CreateFNeg(Ops[2]);
3579  Value *Even = Builder.CreateCall(FMA, Ops);
3580 
3581  if (IsSubAdd)
3582  std::swap(Even, Odd);
3583 
3584  SmallVector<int, 32> Idxs(NumElts);
3585  for (int i = 0; i != NumElts; ++i)
3586  Idxs[i] = i + (i % 2) * NumElts;
3587 
3588  Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3589  }
3590 
3591  Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3592  IsMask3 ? CI->getArgOperand(2) :
3593  CI->getArgOperand(0);
3594 
3595  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3596  } else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
3597  Name.startswith("avx512.maskz.pternlog."))) {
3598  bool ZeroMask = Name[11] == 'z';
3599  unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3600  unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3601  Intrinsic::ID IID;
3602  if (VecWidth == 128 && EltWidth == 32)
3603  IID = Intrinsic::x86_avx512_pternlog_d_128;
3604  else if (VecWidth == 256 && EltWidth == 32)
3605  IID = Intrinsic::x86_avx512_pternlog_d_256;
3606  else if (VecWidth == 512 && EltWidth == 32)
3607  IID = Intrinsic::x86_avx512_pternlog_d_512;
3608  else if (VecWidth == 128 && EltWidth == 64)
3609  IID = Intrinsic::x86_avx512_pternlog_q_128;
3610  else if (VecWidth == 256 && EltWidth == 64)
3611  IID = Intrinsic::x86_avx512_pternlog_q_256;
3612  else if (VecWidth == 512 && EltWidth == 64)
3613  IID = Intrinsic::x86_avx512_pternlog_q_512;
3614  else
3615  llvm_unreachable("Unexpected intrinsic");
3616 
3617  Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3618  CI->getArgOperand(2), CI->getArgOperand(3) };
3619  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3620  Args);
3621  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3622  : CI->getArgOperand(0);
3623  Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3624  } else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") ||
3625  Name.startswith("avx512.maskz.vpmadd52"))) {
3626  bool ZeroMask = Name[11] == 'z';
3627  bool High = Name[20] == 'h' || Name[21] == 'h';
3628  unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3629  Intrinsic::ID IID;
3630  if (VecWidth == 128 && !High)
3631  IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3632  else if (VecWidth == 256 && !High)
3633  IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3634  else if (VecWidth == 512 && !High)
3635  IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3636  else if (VecWidth == 128 && High)
3637  IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3638  else if (VecWidth == 256 && High)
3639  IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3640  else if (VecWidth == 512 && High)
3641  IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3642  else
3643  llvm_unreachable("Unexpected intrinsic");
3644 
3645  Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3646  CI->getArgOperand(2) };
3647  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3648  Args);
3649  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3650  : CI->getArgOperand(0);
3651  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3652  } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") ||
3653  Name.startswith("avx512.mask.vpermt2var.") ||
3654  Name.startswith("avx512.maskz.vpermt2var."))) {
3655  bool ZeroMask = Name[11] == 'z';
3656  bool IndexForm = Name[17] == 'i';
3657  Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
3658  } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
3659  Name.startswith("avx512.maskz.vpdpbusd.") ||
3660  Name.startswith("avx512.mask.vpdpbusds.") ||
3661  Name.startswith("avx512.maskz.vpdpbusds."))) {
3662  bool ZeroMask = Name[11] == 'z';
3663  bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3664  unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3665  Intrinsic::ID IID;
3666  if (VecWidth == 128 && !IsSaturating)
3667  IID = Intrinsic::x86_avx512_vpdpbusd_128;
3668  else if (VecWidth == 256 && !IsSaturating)
3669  IID = Intrinsic::x86_avx512_vpdpbusd_256;
3670  else if (VecWidth == 512 && !IsSaturating)
3671  IID = Intrinsic::x86_avx512_vpdpbusd_512;
3672  else if (VecWidth == 128 && IsSaturating)
3673  IID = Intrinsic::x86_avx512_vpdpbusds_128;
3674  else if (VecWidth == 256 && IsSaturating)
3675  IID = Intrinsic::x86_avx512_vpdpbusds_256;
3676  else if (VecWidth == 512 && IsSaturating)
3677  IID = Intrinsic::x86_avx512_vpdpbusds_512;
3678  else
3679  llvm_unreachable("Unexpected intrinsic");
3680 
3681  Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3682  CI->getArgOperand(2) };
3683  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3684  Args);
3685  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3686  : CI->getArgOperand(0);
3687  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3688  } else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") ||
3689  Name.startswith("avx512.maskz.vpdpwssd.") ||
3690  Name.startswith("avx512.mask.vpdpwssds.") ||
3691  Name.startswith("avx512.maskz.vpdpwssds."))) {
3692  bool ZeroMask = Name[11] == 'z';
3693  bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3694  unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3695  Intrinsic::ID IID;
3696  if (VecWidth == 128 && !IsSaturating)
3697  IID = Intrinsic::x86_avx512_vpdpwssd_128;
3698  else if (VecWidth == 256 && !IsSaturating)
3699  IID = Intrinsic::x86_avx512_vpdpwssd_256;
3700  else if (VecWidth == 512 && !IsSaturating)
3701  IID = Intrinsic::x86_avx512_vpdpwssd_512;
3702  else if (VecWidth == 128 && IsSaturating)
3703  IID = Intrinsic::x86_avx512_vpdpwssds_128;
3704  else if (VecWidth == 256 && IsSaturating)
3705  IID = Intrinsic::x86_avx512_vpdpwssds_256;
3706  else if (VecWidth == 512 && IsSaturating)
3707  IID = Intrinsic::x86_avx512_vpdpwssds_512;
3708  else
3709  llvm_unreachable("Unexpected intrinsic");
3710 
3711  Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3712  CI->getArgOperand(2) };
3713  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3714  Args);
3715  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3716  : CI->getArgOperand(0);
3717  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3718  } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
3719  Name == "addcarry.u32" || Name == "addcarry.u64" ||
3720  Name == "subborrow.u32" || Name == "subborrow.u64")) {
3721  Intrinsic::ID IID;
3722  if (Name[0] == 'a' && Name.back() == '2')
3723  IID = Intrinsic::x86_addcarry_32;
3724  else if (Name[0] == 'a' && Name.back() == '4')
3725  IID = Intrinsic::x86_addcarry_64;
3726  else if (Name[0] == 's' && Name.back() == '2')
3727  IID = Intrinsic::x86_subborrow_32;
3728  else if (Name[0] == 's' && Name.back() == '4')
3729  IID = Intrinsic::x86_subborrow_64;
3730  else
3731  llvm_unreachable("Unexpected intrinsic");
3732 
3733  // Make a call with 3 operands.
3734  Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3735  CI->getArgOperand(2)};
3736  Value *NewCall = Builder.CreateCall(
3738  Args);
3739 
3740  // Extract the second result and store it.
3741  Value *Data = Builder.CreateExtractValue(NewCall, 1);
3742  // Cast the pointer to the right type.
3743  Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
3744  llvm::PointerType::getUnqual(Data->getType()));
3745  Builder.CreateAlignedStore(Data, Ptr, Align(1));
3746  // Replace the original call result with the first result of the new call.
3747  Value *CF = Builder.CreateExtractValue(NewCall, 0);
3748 
3749  CI->replaceAllUsesWith(CF);
3750  Rep = nullptr;
3751  } else if (IsX86 && Name.startswith("avx512.mask.") &&
3752  upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
3753  // Rep will be updated by the call in the condition.
3754  } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
3755  Value *Arg = CI->getArgOperand(0);
3756  Value *Neg = Builder.CreateNeg(Arg, "neg");
3757  Value *Cmp = Builder.CreateICmpSGE(
3758  Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
3759  Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
3760  } else if (IsNVVM && (Name.startswith("atomic.load.add.f32.p") ||
3761  Name.startswith("atomic.load.add.f64.p"))) {
3762  Value *Ptr = CI->getArgOperand(0);
3763  Value *Val = CI->getArgOperand(1);
3764  Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
3766  } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
3767  Name == "max.ui" || Name == "max.ull")) {
3768  Value *Arg0 = CI->getArgOperand(0);
3769  Value *Arg1 = CI->getArgOperand(1);
3770  Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3771  ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
3772  : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
3773  Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
3774  } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
3775  Name == "min.ui" || Name == "min.ull")) {
3776  Value *Arg0 = CI->getArgOperand(0);
3777  Value *Arg1 = CI->getArgOperand(1);
3778  Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3779  ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
3780  : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
3781  Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
3782  } else if (IsNVVM && Name == "clz.ll") {
3783  // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
3784  Value *Arg = CI->getArgOperand(0);
3785  Value *Ctlz = Builder.CreateCall(
3786  Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
3787  {Arg->getType()}),
3788  {Arg, Builder.getFalse()}, "ctlz");
3789  Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
3790  } else if (IsNVVM && Name == "popc.ll") {
3791  // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
3792  // i64.
3793  Value *Arg = CI->getArgOperand(0);
3794  Value *Popc = Builder.CreateCall(
3795  Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
3796  {Arg->getType()}),
3797  Arg, "ctpop");
3798  Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
3799  } else if (IsNVVM && Name == "h2f") {
3800  Rep = Builder.CreateCall(Intrinsic::getDeclaration(
3801  F->getParent(), Intrinsic::convert_from_fp16,
3802  {Builder.getFloatTy()}),
3803  CI->getArgOperand(0), "h2f");
3804  } else if (IsARM) {
3805  Rep = UpgradeARMIntrinsicCall(Name, CI, F, Builder);
3806  } else {
3807  llvm_unreachable("Unknown function for CallBase upgrade.");
3808  }
3809 
3810  if (Rep)
3811  CI->replaceAllUsesWith(Rep);
3812  CI->eraseFromParent();
3813  return;
3814  }
3815 
3816  const auto &DefaultCase = [&]() -> void {
3817  if (CI->getFunctionType() == NewFn->getFunctionType()) {
3818  // Handle generic mangling change.
3819  assert(
3820  (CI->getCalledFunction()->getName() != NewFn->getName()) &&
3821  "Unknown function for CallBase upgrade and isn't just a name change");
3822  CI->setCalledFunction(NewFn);
3823  return;
3824  }
3825 
3826  // This must be an upgrade from a named to a literal struct.
3827  auto *OldST = cast<StructType>(CI->getType());
3828  assert(OldST != NewFn->getReturnType() && "Return type must have changed");
3829  assert(OldST->getNumElements() ==
3830  cast<StructType>(NewFn->getReturnType())->getNumElements() &&
3831  "Must have same number of elements");
3832 
3834  Value *NewCI = Builder.CreateCall(NewFn, Args);
3835  Value *Res = PoisonValue::get(OldST);
3836  for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
3837  Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
3838  Res = Builder.CreateInsertValue(Res, Elem, Idx);
3839  }
3840  CI->replaceAllUsesWith(Res);
3841  CI->eraseFromParent();
3842  return;
3843  };
3844  CallInst *NewCall = nullptr;
3845  switch (NewFn->getIntrinsicID()) {
3846  default: {
3847  DefaultCase();
3848  return;
3849  }
3850  case Intrinsic::arm_neon_vst1:
3851  case Intrinsic::arm_neon_vst2:
3852  case Intrinsic::arm_neon_vst3:
3853  case Intrinsic::arm_neon_vst4:
3854  case Intrinsic::arm_neon_vst2lane:
3855  case Intrinsic::arm_neon_vst3lane:
3856  case Intrinsic::arm_neon_vst4lane: {
3858  NewCall = Builder.CreateCall(NewFn, Args);
3859  break;
3860  }
3861 
3862  case Intrinsic::arm_neon_bfdot:
3863  case Intrinsic::arm_neon_bfmmla:
3864  case Intrinsic::arm_neon_bfmlalb:
3865  case Intrinsic::arm_neon_bfmlalt:
3866  case Intrinsic::aarch64_neon_bfdot:
3867  case Intrinsic::aarch64_neon_bfmmla:
3868  case Intrinsic::aarch64_neon_bfmlalb:
3869  case Intrinsic::aarch64_neon_bfmlalt: {
3871  assert(CI->arg_size() == 3 &&
3872  "Mismatch between function args and call args");
3873  size_t OperandWidth =
3875  assert((OperandWidth == 64 || OperandWidth == 128) &&
3876  "Unexpected operand width");
3877  Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
3878  auto Iter = CI->args().begin();
3879  Args.push_back(*Iter++);
3880  Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
3881  Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
3882  NewCall = Builder.CreateCall(NewFn, Args);
3883  break;
3884  }
3885 
3886  case Intrinsic::bitreverse:
3887  NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3888  break;
3889 
3890  case Intrinsic::ctlz:
3891  case Intrinsic::cttz:
3892  assert(CI->arg_size() == 1 &&
3893  "Mismatch between function args and call args");
3894  NewCall =
3895  Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
3896  break;
3897 
3898  case Intrinsic::objectsize: {
3899  Value *NullIsUnknownSize =
3900  CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
3901  Value *Dynamic =
3902  CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
3903  NewCall = Builder.CreateCall(
3904  NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
3905  break;
3906  }
3907 
3908  case Intrinsic::ctpop:
3909  NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3910  break;
3911 
3912  case Intrinsic::convert_from_fp16:
3913  NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3914  break;
3915 
3916  case Intrinsic::dbg_value:
3917  // Upgrade from the old version that had an extra offset argument.
3918  assert(CI->arg_size() == 4);
3919  // Drop nonzero offsets instead of attempting to upgrade them.
3920  if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
3921  if (Offset->isZeroValue()) {
3922  NewCall = Builder.CreateCall(
3923  NewFn,
3924  {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
3925  break;
3926  }
3927  CI->eraseFromParent();
3928  return;
3929 
3930  case Intrinsic::ptr_annotation:
3931  // Upgrade from versions that lacked the annotation attribute argument.
3932  if (CI->arg_size() != 4) {
3933  DefaultCase();
3934  return;
3935  }
3936 
3937  // Create a new call with an added null annotation attribute argument.
3938  NewCall = Builder.CreateCall(
3939  NewFn,
3940  {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
3941  CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
3942  NewCall->takeName(CI);
3943  CI->replaceAllUsesWith(NewCall);
3944  CI->eraseFromParent();
3945  return;
3946 
3947  case Intrinsic::var_annotation:
3948  // Upgrade from versions that lacked the annotation attribute argument.
3949  assert(CI->arg_size() == 4 &&
3950  "Before LLVM 12.0 this intrinsic took four arguments");
3951  // Create a new call with an added null annotation attribute argument.
3952  NewCall = Builder.CreateCall(
3953  NewFn,
3954  {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
3955  CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
3956  CI->eraseFromParent();
3957  return;
3958 
3959  case Intrinsic::x86_xop_vfrcz_ss:
3960  case Intrinsic::x86_xop_vfrcz_sd:
3961  NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
3962  break;
3963 
3964  case Intrinsic::x86_xop_vpermil2pd:
3965  case Intrinsic::x86_xop_vpermil2ps:
3966  case Intrinsic::x86_xop_vpermil2pd_256:
3967  case Intrinsic::x86_xop_vpermil2ps_256: {
3969  VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
3970  VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
3971  Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
3972  NewCall = Builder.CreateCall(NewFn, Args);
3973  break;
3974  }
3975 
3976  case Intrinsic::x86_sse41_ptestc:
3977  case Intrinsic::x86_sse41_ptestz:
3978  case Intrinsic::x86_sse41_ptestnzc: {
3979  // The arguments for these intrinsics used to be v4f32, and changed
3980  // to v2i64. This is purely a nop, since those are bitwise intrinsics.
3981  // So, the only thing required is a bitcast for both arguments.
3982  // First, check the arguments have the old type.
3983  Value *Arg0 = CI->getArgOperand(0);
3984  if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
3985  return;
3986 
3987  // Old intrinsic, add bitcasts
3988  Value *Arg1 = CI->getArgOperand(1);
3989 
3990  auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
3991 
3992  Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
3993  Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
3994 
3995  NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
3996  break;
3997  }
3998 
3999  case Intrinsic::x86_rdtscp: {
4000  // This used to take 1 arguments. If we have no arguments, it is already
4001  // upgraded.
4002  if (CI->getNumOperands() == 0)
4003  return;
4004 
4005  NewCall = Builder.CreateCall(NewFn);
4006  // Extract the second result and store it.
4007  Value *Data = Builder.CreateExtractValue(NewCall, 1);
4008  // Cast the pointer to the right type.
4009  Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
4010  llvm::PointerType::getUnqual(Data->getType()));
4011  Builder.CreateAlignedStore(Data, Ptr, Align(1));
4012  // Replace the original call result with the first result of the new call.
4013  Value *TSC = Builder.CreateExtractValue(NewCall, 0);
4014 
4015  NewCall->takeName(CI);
4016  CI->replaceAllUsesWith(TSC);
4017  CI->eraseFromParent();
4018  return;
4019  }
4020 
4021  case Intrinsic::x86_sse41_insertps:
4022  case Intrinsic::x86_sse41_dppd:
4023  case Intrinsic::x86_sse41_dpps:
4024  case Intrinsic::x86_sse41_mpsadbw:
4025  case Intrinsic::x86_avx_dp_ps_256:
4026  case Intrinsic::x86_avx2_mpsadbw: {
4027  // Need to truncate the last argument from i32 to i8 -- this argument models
4028  // an inherently 8-bit immediate operand to these x86 instructions.
4030 
4031  // Replace the last argument with a trunc.
4032  Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
4033  NewCall = Builder.CreateCall(NewFn, Args);
4034  break;
4035  }
4036 
4037  case Intrinsic::x86_avx512_mask_cmp_pd_128:
4038  case Intrinsic::x86_avx512_mask_cmp_pd_256:
4039  case Intrinsic::x86_avx512_mask_cmp_pd_512:
4040  case Intrinsic::x86_avx512_mask_cmp_ps_128:
4041  case Intrinsic::x86_avx512_mask_cmp_ps_256:
4042  case Intrinsic::x86_avx512_mask_cmp_ps_512: {
4044  unsigned NumElts =
4045  cast<FixedVectorType>(Args[0]->getType())->getNumElements();
4046  Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
4047 
4048  NewCall = Builder.CreateCall(NewFn, Args);
4049  Value *Res = ApplyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
4050 
4051  NewCall->takeName(CI);
4052  CI->replaceAllUsesWith(Res);
4053  CI->eraseFromParent();
4054  return;
4055  }
4056 
4057  case Intrinsic::thread_pointer: {
4058  NewCall = Builder.CreateCall(NewFn, {});
4059  break;
4060  }
4061 
4062  case Intrinsic::invariant_start:
4063  case Intrinsic::invariant_end: {
4065  NewCall = Builder.CreateCall(NewFn, Args);
4066  break;
4067  }
4068  case Intrinsic::masked_load:
4069  case Intrinsic::masked_store:
4070  case Intrinsic::masked_gather:
4071  case Intrinsic::masked_scatter: {
4073  NewCall = Builder.CreateCall(NewFn, Args);
4074  NewCall->copyMetadata(*CI);
4075  break;
4076  }
4077 
4078  case Intrinsic::memcpy:
4079  case Intrinsic::memmove:
4080  case Intrinsic::memset: {
4081  // We have to make sure that the call signature is what we're expecting.
4082  // We only want to change the old signatures by removing the alignment arg:
4083  // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
4084  // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
4085  // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
4086  // -> @llvm.memset...(i8*, i8, i[32|64], i1)
4087  // Note: i8*'s in the above can be any pointer type
4088  if (CI->arg_size() != 5) {
4089  DefaultCase();
4090  return;
4091  }
4092  // Remove alignment argument (3), and add alignment attributes to the
4093  // dest/src pointers.
4094  Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
4095  CI->getArgOperand(2), CI->getArgOperand(4)};
4096  NewCall = Builder.CreateCall(NewFn, Args);
4097  AttributeList OldAttrs = CI->getAttributes();
4098  AttributeList NewAttrs = AttributeList::get(
4099  C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
4100  {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
4101  OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
4102  NewCall->setAttributes(NewAttrs);
4103  auto *MemCI = cast<MemIntrinsic>(NewCall);
4104  // All mem intrinsics support dest alignment.
4105  const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
4106  MemCI->setDestAlignment(Align->getMaybeAlignValue());
4107  // Memcpy/Memmove also support source alignment.
4108  if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
4109  MTI->setSourceAlignment(Align->getMaybeAlignValue());
4110  break;
4111  }
4112  }
4113  assert(NewCall && "Should have either set this variable or returned through "
4114  "the default case");
4115  NewCall->takeName(CI);
4116  CI->replaceAllUsesWith(NewCall);
4117  CI->eraseFromParent();
4118 }
4119 
4121  assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
4122 
4123  // Check if this function should be upgraded and get the replacement function
4124  // if there is one.
4125  Function *NewFn;
4126  if (UpgradeIntrinsicFunction(F, NewFn)) {
4127  // Replace all users of the old function with the new function or new
4128  // instructions. This is not a range loop because the call is deleted.
4129  for (User *U : make_early_inc_range(F->users()))
4130  if (CallBase *CB = dyn_cast<CallBase>(U))
4131  UpgradeIntrinsicCall(CB, NewFn);
4132 
4133  // Remove old function, no longer used, from the module.
4134  F->eraseFromParent();
4135  }
4136 }
4137 
4139  // Check if the tag uses struct-path aware TBAA format.
4140  if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
4141  return &MD;
4142 
4143  auto &Context = MD.getContext();
4144  if (MD.getNumOperands() == 3) {
4145  Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
4146  MDNode *ScalarType = MDNode::get(Context, Elts);
4147  // Create a MDNode <ScalarType, ScalarType, offset 0, const>
4148  Metadata *Elts2[] = {ScalarType, ScalarType,
4151  MD.getOperand(2)};
4152  return MDNode::get(Context, Elts2);
4153  }
4154  // Create a MDNode <MD, MD, offset 0>
4157  return MDNode::get(Context, Elts);
4158 }
4159 
4160 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
4161  Instruction *&Temp) {
4162  if (Opc != Instruction::BitCast)
4163  return nullptr;
4164 
4165  Temp = nullptr;
4166  Type *SrcTy = V->getType();
4167  if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4168  SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4169  LLVMContext &Context = V->getContext();
4170 
4171  // We have no information about target data layout, so we assume that
4172  // the maximum pointer size is 64bit.
4173  Type *MidTy = Type::getInt64Ty(Context);
4174  Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
4175 
4176  return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
4177  }
4178 
4179  return nullptr;
4180 }
4181 
4182 Constant *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
4183  if (Opc != Instruction::BitCast)
4184  return nullptr;
4185 
4186  Type *SrcTy = C->getType();
4187  if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4188  SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4189  LLVMContext &Context = C->getContext();
4190 
4191  // We have no information about target data layout, so we assume that
4192  // the maximum pointer size is 64bit.
4193  Type *MidTy = Type::getInt64Ty(Context);
4194 
4196  DestTy);
4197  }
4198 
4199  return nullptr;
4200 }
4201 
4202 /// Check the debug info version number, if it is out-dated, drop the debug
4203 /// info. Return true if module is modified.
4207  bool BrokenDebugInfo = false;
4208  if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))