LLVM  14.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the auto-upgrade helper functions.
10 // This is where deprecated IR intrinsics and other IR features are updated to
11 // current specifications.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/IR/AutoUpgrade.h"
16 #include "llvm/ADT/StringSwitch.h"
17 #include "llvm/IR/Constants.h"
18 #include "llvm/IR/DIBuilder.h"
19 #include "llvm/IR/DebugInfo.h"
20 #include "llvm/IR/DiagnosticInfo.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/IRBuilder.h"
23 #include "llvm/IR/InstVisitor.h"
24 #include "llvm/IR/Instruction.h"
25 #include "llvm/IR/IntrinsicInst.h"
26 #include "llvm/IR/Intrinsics.h"
27 #include "llvm/IR/IntrinsicsAArch64.h"
28 #include "llvm/IR/IntrinsicsARM.h"
29 #include "llvm/IR/IntrinsicsX86.h"
30 #include "llvm/IR/LLVMContext.h"
31 #include "llvm/IR/Module.h"
32 #include "llvm/IR/Verifier.h"
34 #include "llvm/Support/Regex.h"
35 #include <cstring>
36 using namespace llvm;
37 
38 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
39 
40 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
41 // changed their type from v4f32 to v2i64.
43  Function *&NewFn) {
44  // Check whether this is an old version of the function, which received
45  // v4f32 arguments.
46  Type *Arg0Type = F->getFunctionType()->getParamType(0);
47  if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
48  return false;
49 
50  // Yes, it's old, replace it with new version.
51  rename(F);
52  NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
53  return true;
54 }
55 
56 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
57 // arguments have changed their type from i32 to i8.
59  Function *&NewFn) {
60  // Check that the last argument is an i32.
61  Type *LastArgType = F->getFunctionType()->getParamType(
62  F->getFunctionType()->getNumParams() - 1);
63  if (!LastArgType->isIntegerTy(32))
64  return false;
65 
66  // Move this function aside and map down.
67  rename(F);
68  NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
69  return true;
70 }
71 
72 // Upgrade the declaration of fp compare intrinsics that change return type
73 // from scalar to vXi1 mask.
75  Function *&NewFn) {
76  // Check if the return type is a vector.
77  if (F->getReturnType()->isVectorTy())
78  return false;
79 
80  rename(F);
81  NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
82  return true;
83 }
84 
86  // All of the intrinsics matches below should be marked with which llvm
87  // version started autoupgrading them. At some point in the future we would
88  // like to use this information to remove upgrade code for some older
89  // intrinsics. It is currently undecided how we will determine that future
90  // point.
91  if (Name == "addcarryx.u32" || // Added in 8.0
92  Name == "addcarryx.u64" || // Added in 8.0
93  Name == "addcarry.u32" || // Added in 8.0
94  Name == "addcarry.u64" || // Added in 8.0
95  Name == "subborrow.u32" || // Added in 8.0
96  Name == "subborrow.u64" || // Added in 8.0
97  Name.startswith("sse2.padds.") || // Added in 8.0
98  Name.startswith("sse2.psubs.") || // Added in 8.0
99  Name.startswith("sse2.paddus.") || // Added in 8.0
100  Name.startswith("sse2.psubus.") || // Added in 8.0
101  Name.startswith("avx2.padds.") || // Added in 8.0
102  Name.startswith("avx2.psubs.") || // Added in 8.0
103  Name.startswith("avx2.paddus.") || // Added in 8.0
104  Name.startswith("avx2.psubus.") || // Added in 8.0
105  Name.startswith("avx512.padds.") || // Added in 8.0
106  Name.startswith("avx512.psubs.") || // Added in 8.0
107  Name.startswith("avx512.mask.padds.") || // Added in 8.0
108  Name.startswith("avx512.mask.psubs.") || // Added in 8.0
109  Name.startswith("avx512.mask.paddus.") || // Added in 8.0
110  Name.startswith("avx512.mask.psubus.") || // Added in 8.0
111  Name=="ssse3.pabs.b.128" || // Added in 6.0
112  Name=="ssse3.pabs.w.128" || // Added in 6.0
113  Name=="ssse3.pabs.d.128" || // Added in 6.0
114  Name.startswith("fma4.vfmadd.s") || // Added in 7.0
115  Name.startswith("fma.vfmadd.") || // Added in 7.0
116  Name.startswith("fma.vfmsub.") || // Added in 7.0
117  Name.startswith("fma.vfmsubadd.") || // Added in 7.0
118  Name.startswith("fma.vfnmadd.") || // Added in 7.0
119  Name.startswith("fma.vfnmsub.") || // Added in 7.0
120  Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0
121  Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
122  Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
123  Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
124  Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
125  Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
126  Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
127  Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
128  Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
129  Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
130  Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
131  Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
132  Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
133  Name.startswith("avx512.kunpck") || //added in 6.0
134  Name.startswith("avx2.pabs.") || // Added in 6.0
135  Name.startswith("avx512.mask.pabs.") || // Added in 6.0
136  Name.startswith("avx512.broadcastm") || // Added in 6.0
137  Name == "sse.sqrt.ss" || // Added in 7.0
138  Name == "sse2.sqrt.sd" || // Added in 7.0
139  Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0
140  Name.startswith("avx.sqrt.p") || // Added in 7.0
141  Name.startswith("sse2.sqrt.p") || // Added in 7.0
142  Name.startswith("sse.sqrt.p") || // Added in 7.0
143  Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
144  Name.startswith("sse2.pcmpeq.") || // Added in 3.1
145  Name.startswith("sse2.pcmpgt.") || // Added in 3.1
146  Name.startswith("avx2.pcmpeq.") || // Added in 3.1
147  Name.startswith("avx2.pcmpgt.") || // Added in 3.1
148  Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
149  Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
150  Name.startswith("avx.vperm2f128.") || // Added in 6.0
151  Name == "avx2.vperm2i128" || // Added in 6.0
152  Name == "sse.add.ss" || // Added in 4.0
153  Name == "sse2.add.sd" || // Added in 4.0
154  Name == "sse.sub.ss" || // Added in 4.0
155  Name == "sse2.sub.sd" || // Added in 4.0
156  Name == "sse.mul.ss" || // Added in 4.0
157  Name == "sse2.mul.sd" || // Added in 4.0
158  Name == "sse.div.ss" || // Added in 4.0
159  Name == "sse2.div.sd" || // Added in 4.0
160  Name == "sse41.pmaxsb" || // Added in 3.9
161  Name == "sse2.pmaxs.w" || // Added in 3.9
162  Name == "sse41.pmaxsd" || // Added in 3.9
163  Name == "sse2.pmaxu.b" || // Added in 3.9
164  Name == "sse41.pmaxuw" || // Added in 3.9
165  Name == "sse41.pmaxud" || // Added in 3.9
166  Name == "sse41.pminsb" || // Added in 3.9
167  Name == "sse2.pmins.w" || // Added in 3.9
168  Name == "sse41.pminsd" || // Added in 3.9
169  Name == "sse2.pminu.b" || // Added in 3.9
170  Name == "sse41.pminuw" || // Added in 3.9
171  Name == "sse41.pminud" || // Added in 3.9
172  Name == "avx512.kand.w" || // Added in 7.0
173  Name == "avx512.kandn.w" || // Added in 7.0
174  Name == "avx512.knot.w" || // Added in 7.0
175  Name == "avx512.kor.w" || // Added in 7.0
176  Name == "avx512.kxor.w" || // Added in 7.0
177  Name == "avx512.kxnor.w" || // Added in 7.0
178  Name == "avx512.kortestc.w" || // Added in 7.0
179  Name == "avx512.kortestz.w" || // Added in 7.0
180  Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
181  Name.startswith("avx2.pmax") || // Added in 3.9
182  Name.startswith("avx2.pmin") || // Added in 3.9
183  Name.startswith("avx512.mask.pmax") || // Added in 4.0
184  Name.startswith("avx512.mask.pmin") || // Added in 4.0
185  Name.startswith("avx2.vbroadcast") || // Added in 3.8
186  Name.startswith("avx2.pbroadcast") || // Added in 3.8
187  Name.startswith("avx.vpermil.") || // Added in 3.1
188  Name.startswith("sse2.pshuf") || // Added in 3.9
189  Name.startswith("avx512.pbroadcast") || // Added in 3.9
190  Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
191  Name.startswith("avx512.mask.movddup") || // Added in 3.9
192  Name.startswith("avx512.mask.movshdup") || // Added in 3.9
193  Name.startswith("avx512.mask.movsldup") || // Added in 3.9
194  Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
195  Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
196  Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
197  Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
198  Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
199  Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
200  Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
201  Name.startswith("avx512.mask.punpckl") || // Added in 3.9
202  Name.startswith("avx512.mask.punpckh") || // Added in 3.9
203  Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
204  Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
205  Name.startswith("avx512.mask.pand.") || // Added in 3.9
206  Name.startswith("avx512.mask.pandn.") || // Added in 3.9
207  Name.startswith("avx512.mask.por.") || // Added in 3.9
208  Name.startswith("avx512.mask.pxor.") || // Added in 3.9
209  Name.startswith("avx512.mask.and.") || // Added in 3.9
210  Name.startswith("avx512.mask.andn.") || // Added in 3.9
211  Name.startswith("avx512.mask.or.") || // Added in 3.9
212  Name.startswith("avx512.mask.xor.") || // Added in 3.9
213  Name.startswith("avx512.mask.padd.") || // Added in 4.0
214  Name.startswith("avx512.mask.psub.") || // Added in 4.0
215  Name.startswith("avx512.mask.pmull.") || // Added in 4.0
216  Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
217  Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
218  Name.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0
219  Name.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0
220  Name.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0
221  Name.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0
222  Name == "avx512.mask.vcvtph2ps.128" || // Added in 11.0
223  Name == "avx512.mask.vcvtph2ps.256" || // Added in 11.0
224  Name == "avx512.mask.cvtqq2ps.256" || // Added in 9.0
225  Name == "avx512.mask.cvtqq2ps.512" || // Added in 9.0
226  Name == "avx512.mask.cvtuqq2ps.256" || // Added in 9.0
227  Name == "avx512.mask.cvtuqq2ps.512" || // Added in 9.0
228  Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0
229  Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0
230  Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0
231  Name == "avx512.mask.cvttps2dq.128" || // Added in 7.0
232  Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0
233  Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
234  Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
235  Name == "avx512.cvtusi2sd" || // Added in 7.0
236  Name.startswith("avx512.mask.permvar.") || // Added in 7.0
237  Name == "sse2.pmulu.dq" || // Added in 7.0
238  Name == "sse41.pmuldq" || // Added in 7.0
239  Name == "avx2.pmulu.dq" || // Added in 7.0
240  Name == "avx2.pmul.dq" || // Added in 7.0
241  Name == "avx512.pmulu.dq.512" || // Added in 7.0
242  Name == "avx512.pmul.dq.512" || // Added in 7.0
243  Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
244  Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
245  Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
246  Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
247  Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
248  Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
249  Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
250  Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
251  Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
252  Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
253  Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
254  Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
255  Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
256  Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
257  Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
258  Name.startswith("avx512.cmp.p") || // Added in 12.0
259  Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
260  Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
261  Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
262  Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
263  Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
264  Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
265  Name.startswith("avx512.mask.psll.d") || // Added in 4.0
266  Name.startswith("avx512.mask.psll.q") || // Added in 4.0
267  Name.startswith("avx512.mask.psll.w") || // Added in 4.0
268  Name.startswith("avx512.mask.psra.d") || // Added in 4.0
269  Name.startswith("avx512.mask.psra.q") || // Added in 4.0
270  Name.startswith("avx512.mask.psra.w") || // Added in 4.0
271  Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
272  Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
273  Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
274  Name.startswith("avx512.mask.pslli") || // Added in 4.0
275  Name.startswith("avx512.mask.psrai") || // Added in 4.0
276  Name.startswith("avx512.mask.psrli") || // Added in 4.0
277  Name.startswith("avx512.mask.psllv") || // Added in 4.0
278  Name.startswith("avx512.mask.psrav") || // Added in 4.0
279  Name.startswith("avx512.mask.psrlv") || // Added in 4.0
280  Name.startswith("sse41.pmovsx") || // Added in 3.8
281  Name.startswith("sse41.pmovzx") || // Added in 3.9
282  Name.startswith("avx2.pmovsx") || // Added in 3.9
283  Name.startswith("avx2.pmovzx") || // Added in 3.9
284  Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
285  Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
286  Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
287  Name.startswith("avx512.mask.pternlog.") || // Added in 7.0
288  Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
289  Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0
290  Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
291  Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
292  Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
293  Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
294  Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
295  Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
296  Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
297  Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
298  Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
299  Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
300  Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
301  Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
302  Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
303  Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
304  Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
305  Name.startswith("avx512.mask.vpshldv.") || // Added in 8.0
306  Name.startswith("avx512.mask.vpshrdv.") || // Added in 8.0
307  Name.startswith("avx512.maskz.vpshldv.") || // Added in 8.0
308  Name.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0
309  Name.startswith("avx512.vpshld.") || // Added in 8.0
310  Name.startswith("avx512.vpshrd.") || // Added in 8.0
311  Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
312  Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
313  Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
314  Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
315  Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
316  Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
317  Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
318  Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
319  Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
320  Name.startswith("avx512.mask.conflict.") || // Added in 9.0
321  Name == "avx512.mask.pmov.qd.256" || // Added in 9.0
322  Name == "avx512.mask.pmov.qd.512" || // Added in 9.0
323  Name == "avx512.mask.pmov.wb.256" || // Added in 9.0
324  Name == "avx512.mask.pmov.wb.512" || // Added in 9.0
325  Name == "sse.cvtsi2ss" || // Added in 7.0
326  Name == "sse.cvtsi642ss" || // Added in 7.0
327  Name == "sse2.cvtsi2sd" || // Added in 7.0
328  Name == "sse2.cvtsi642sd" || // Added in 7.0
329  Name == "sse2.cvtss2sd" || // Added in 7.0
330  Name == "sse2.cvtdq2pd" || // Added in 3.9
331  Name == "sse2.cvtdq2ps" || // Added in 7.0
332  Name == "sse2.cvtps2pd" || // Added in 3.9
333  Name == "avx.cvtdq2.pd.256" || // Added in 3.9
334  Name == "avx.cvtdq2.ps.256" || // Added in 7.0
335  Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
336  Name.startswith("vcvtph2ps.") || // Added in 11.0
337  Name.startswith("avx.vinsertf128.") || // Added in 3.7
338  Name == "avx2.vinserti128" || // Added in 3.7
339  Name.startswith("avx512.mask.insert") || // Added in 4.0
340  Name.startswith("avx.vextractf128.") || // Added in 3.7
341  Name == "avx2.vextracti128" || // Added in 3.7
342  Name.startswith("avx512.mask.vextract") || // Added in 4.0
343  Name.startswith("sse4a.movnt.") || // Added in 3.9
344  Name.startswith("avx.movnt.") || // Added in 3.2
345  Name.startswith("avx512.storent.") || // Added in 3.9
346  Name == "sse41.movntdqa" || // Added in 5.0
347  Name == "avx2.movntdqa" || // Added in 5.0
348  Name == "avx512.movntdqa" || // Added in 5.0
349  Name == "sse2.storel.dq" || // Added in 3.9
350  Name.startswith("sse.storeu.") || // Added in 3.9
351  Name.startswith("sse2.storeu.") || // Added in 3.9
352  Name.startswith("avx.storeu.") || // Added in 3.9
353  Name.startswith("avx512.mask.storeu.") || // Added in 3.9
354  Name.startswith("avx512.mask.store.p") || // Added in 3.9
355  Name.startswith("avx512.mask.store.b.") || // Added in 3.9
356  Name.startswith("avx512.mask.store.w.") || // Added in 3.9
357  Name.startswith("avx512.mask.store.d.") || // Added in 3.9
358  Name.startswith("avx512.mask.store.q.") || // Added in 3.9
359  Name == "avx512.mask.store.ss" || // Added in 7.0
360  Name.startswith("avx512.mask.loadu.") || // Added in 3.9
361  Name.startswith("avx512.mask.load.") || // Added in 3.9
362  Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
363  Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
364  Name.startswith("avx512.mask.expand.b") || // Added in 9.0
365  Name.startswith("avx512.mask.expand.w") || // Added in 9.0
366  Name.startswith("avx512.mask.expand.d") || // Added in 9.0
367  Name.startswith("avx512.mask.expand.q") || // Added in 9.0
368  Name.startswith("avx512.mask.expand.p") || // Added in 9.0
369  Name.startswith("avx512.mask.compress.b") || // Added in 9.0
370  Name.startswith("avx512.mask.compress.w") || // Added in 9.0
371  Name.startswith("avx512.mask.compress.d") || // Added in 9.0
372  Name.startswith("avx512.mask.compress.q") || // Added in 9.0
373  Name.startswith("avx512.mask.compress.p") || // Added in 9.0
374  Name == "sse42.crc32.64.8" || // Added in 3.4
375  Name.startswith("avx.vbroadcast.s") || // Added in 3.5
376  Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
377  Name.startswith("avx512.mask.palignr.") || // Added in 3.9
378  Name.startswith("avx512.mask.valign.") || // Added in 4.0
379  Name.startswith("sse2.psll.dq") || // Added in 3.7
380  Name.startswith("sse2.psrl.dq") || // Added in 3.7
381  Name.startswith("avx2.psll.dq") || // Added in 3.7
382  Name.startswith("avx2.psrl.dq") || // Added in 3.7
383  Name.startswith("avx512.psll.dq") || // Added in 3.9
384  Name.startswith("avx512.psrl.dq") || // Added in 3.9
385  Name == "sse41.pblendw" || // Added in 3.7
386  Name.startswith("sse41.blendp") || // Added in 3.7
387  Name.startswith("avx.blend.p") || // Added in 3.7
388  Name == "avx2.pblendw" || // Added in 3.7
389  Name.startswith("avx2.pblendd.") || // Added in 3.7
390  Name.startswith("avx.vbroadcastf128") || // Added in 4.0
391  Name == "avx2.vbroadcasti128" || // Added in 3.7
392  Name.startswith("avx512.mask.broadcastf32x4.") || // Added in 6.0
393  Name.startswith("avx512.mask.broadcastf64x2.") || // Added in 6.0
394  Name.startswith("avx512.mask.broadcastf32x8.") || // Added in 6.0
395  Name.startswith("avx512.mask.broadcastf64x4.") || // Added in 6.0
396  Name.startswith("avx512.mask.broadcasti32x4.") || // Added in 6.0
397  Name.startswith("avx512.mask.broadcasti64x2.") || // Added in 6.0
398  Name.startswith("avx512.mask.broadcasti32x8.") || // Added in 6.0
399  Name.startswith("avx512.mask.broadcasti64x4.") || // Added in 6.0
400  Name == "xop.vpcmov" || // Added in 3.8
401  Name == "xop.vpcmov.256" || // Added in 5.0
402  Name.startswith("avx512.mask.move.s") || // Added in 4.0
403  Name.startswith("avx512.cvtmask2") || // Added in 5.0
404  Name.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0
405  Name.startswith("xop.vprot") || // Added in 8.0
406  Name.startswith("avx512.prol") || // Added in 8.0
407  Name.startswith("avx512.pror") || // Added in 8.0
408  Name.startswith("avx512.mask.prorv.") || // Added in 8.0
409  Name.startswith("avx512.mask.pror.") || // Added in 8.0
410  Name.startswith("avx512.mask.prolv.") || // Added in 8.0
411  Name.startswith("avx512.mask.prol.") || // Added in 8.0
412  Name.startswith("avx512.ptestm") || //Added in 6.0
413  Name.startswith("avx512.ptestnm") || //Added in 6.0
414  Name.startswith("avx512.mask.pavg")) // Added in 6.0
415  return true;
416 
417  return false;
418 }
419 
421  Function *&NewFn) {
422  // Only handle intrinsics that start with "x86.".
423  if (!Name.startswith("x86."))
424  return false;
425  // Remove "x86." prefix.
426  Name = Name.substr(4);
427 
429  NewFn = nullptr;
430  return true;
431  }
432 
433  if (Name == "rdtscp") { // Added in 8.0
434  // If this intrinsic has 0 operands, it's the new version.
435  if (F->getFunctionType()->getNumParams() == 0)
436  return false;
437 
438  rename(F);
439  NewFn = Intrinsic::getDeclaration(F->getParent(),
440  Intrinsic::x86_rdtscp);
441  return true;
442  }
443 
444  // SSE4.1 ptest functions may have an old signature.
445  if (Name.startswith("sse41.ptest")) { // Added in 3.2
446  if (Name.substr(11) == "c")
447  return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
448  if (Name.substr(11) == "z")
449  return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
450  if (Name.substr(11) == "nzc")
451  return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
452  }
453  // Several blend and other instructions with masks used the wrong number of
454  // bits.
455  if (Name == "sse41.insertps") // Added in 3.6
456  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
457  NewFn);
458  if (Name == "sse41.dppd") // Added in 3.6
459  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
460  NewFn);
461  if (Name == "sse41.dpps") // Added in 3.6
462  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
463  NewFn);
464  if (Name == "sse41.mpsadbw") // Added in 3.6
465  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
466  NewFn);
467  if (Name == "avx.dp.ps.256") // Added in 3.6
468  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
469  NewFn);
470  if (Name == "avx2.mpsadbw") // Added in 3.6
471  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
472  NewFn);
473  if (Name == "avx512.mask.cmp.pd.128") // Added in 7.0
474  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_128,
475  NewFn);
476  if (Name == "avx512.mask.cmp.pd.256") // Added in 7.0
477  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_256,
478  NewFn);
479  if (Name == "avx512.mask.cmp.pd.512") // Added in 7.0
480  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_512,
481  NewFn);
482  if (Name == "avx512.mask.cmp.ps.128") // Added in 7.0
483  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_128,
484  NewFn);
485  if (Name == "avx512.mask.cmp.ps.256") // Added in 7.0
486  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_256,
487  NewFn);
488  if (Name == "avx512.mask.cmp.ps.512") // Added in 7.0
489  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_512,
490  NewFn);
491 
492  // frcz.ss/sd may need to have an argument dropped. Added in 3.2
493  if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
494  rename(F);
495  NewFn = Intrinsic::getDeclaration(F->getParent(),
496  Intrinsic::x86_xop_vfrcz_ss);
497  return true;
498  }
499  if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
500  rename(F);
501  NewFn = Intrinsic::getDeclaration(F->getParent(),
502  Intrinsic::x86_xop_vfrcz_sd);
503  return true;
504  }
505  // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
506  if (Name.startswith("xop.vpermil2")) { // Added in 3.9
507  auto Idx = F->getFunctionType()->getParamType(2);
508  if (Idx->isFPOrFPVectorTy()) {
509  rename(F);
510  unsigned IdxSize = Idx->getPrimitiveSizeInBits();
511  unsigned EltSize = Idx->getScalarSizeInBits();
512  Intrinsic::ID Permil2ID;
513  if (EltSize == 64 && IdxSize == 128)
514  Permil2ID = Intrinsic::x86_xop_vpermil2pd;
515  else if (EltSize == 32 && IdxSize == 128)
516  Permil2ID = Intrinsic::x86_xop_vpermil2ps;
517  else if (EltSize == 64 && IdxSize == 256)
518  Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
519  else
520  Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
521  NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
522  return true;
523  }
524  }
525 
526  if (Name == "seh.recoverfp") {
527  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
528  return true;
529  }
530 
531  return false;
532 }
533 
535  assert(F && "Illegal to upgrade a non-existent Function.");
536 
537  // Quickly eliminate it, if it's not a candidate.
538  StringRef Name = F->getName();
539  if (Name.size() <= 8 || !Name.startswith("llvm."))
540  return false;
541  Name = Name.substr(5); // Strip off "llvm."
542 
543  switch (Name[0]) {
544  default: break;
545  case 'a': {
546  if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
547  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
548  F->arg_begin()->getType());
549  return true;
550  }
551  if (Name.startswith("aarch64.neon.frintn")) {
552  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::roundeven,
553  F->arg_begin()->getType());
554  return true;
555  }
556  if (Name.startswith("aarch64.neon.rbit")) {
557  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
558  F->arg_begin()->getType());
559  return true;
560  }
561  if (Name.startswith("arm.neon.vclz")) {
562  Type* args[2] = {
563  F->arg_begin()->getType(),
564  Type::getInt1Ty(F->getContext())
565  };
566  // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
567  // the end of the name. Change name from llvm.arm.neon.vclz.* to
568  // llvm.ctlz.*
569  FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
570  NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
571  "llvm.ctlz." + Name.substr(14), F->getParent());
572  return true;
573  }
574  if (Name.startswith("arm.neon.vcnt")) {
575  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
576  F->arg_begin()->getType());
577  return true;
578  }
579  static const Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
580  if (vldRegex.match(Name)) {
581  auto fArgs = F->getFunctionType()->params();
582  SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
583  // Can't use Intrinsic::getDeclaration here as the return types might
584  // then only be structurally equal.
585  FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
586  StringRef Suffix =
587  F->getContext().supportsTypedPointers() ? "p0i8" : "p0";
588  NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
589  "llvm." + Name + "." + Suffix, F->getParent());
590  return true;
591  }
592  static const Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
593  if (vstRegex.match(Name)) {
594  static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
595  Intrinsic::arm_neon_vst2,
596  Intrinsic::arm_neon_vst3,
597  Intrinsic::arm_neon_vst4};
598 
599  static const Intrinsic::ID StoreLaneInts[] = {
600  Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
601  Intrinsic::arm_neon_vst4lane
602  };
603 
604  auto fArgs = F->getFunctionType()->params();
605  Type *Tys[] = {fArgs[0], fArgs[1]};
606  if (!Name.contains("lane"))
607  NewFn = Intrinsic::getDeclaration(F->getParent(),
608  StoreInts[fArgs.size() - 3], Tys);
609  else
610  NewFn = Intrinsic::getDeclaration(F->getParent(),
611  StoreLaneInts[fArgs.size() - 5], Tys);
612  return true;
613  }
614  if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
615  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
616  return true;
617  }
618  if (Name.startswith("arm.neon.vqadds.")) {
619  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::sadd_sat,
620  F->arg_begin()->getType());
621  return true;
622  }
623  if (Name.startswith("arm.neon.vqaddu.")) {
624  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::uadd_sat,
625  F->arg_begin()->getType());
626  return true;
627  }
628  if (Name.startswith("arm.neon.vqsubs.")) {
629  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ssub_sat,
630  F->arg_begin()->getType());
631  return true;
632  }
633  if (Name.startswith("arm.neon.vqsubu.")) {
634  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::usub_sat,
635  F->arg_begin()->getType());
636  return true;
637  }
638  if (Name.startswith("aarch64.neon.addp")) {
639  if (F->arg_size() != 2)
640  break; // Invalid IR.
641  VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
642  if (Ty && Ty->getElementType()->isFloatingPointTy()) {
643  NewFn = Intrinsic::getDeclaration(F->getParent(),
644  Intrinsic::aarch64_neon_faddp, Ty);
645  return true;
646  }
647  }
648 
649  // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and v16i8
650  // respectively
651  if ((Name.startswith("arm.neon.bfdot.") ||
652  Name.startswith("aarch64.neon.bfdot.")) &&
653  Name.endswith("i8")) {
654  Intrinsic::ID IID =
656  .Cases("arm.neon.bfdot.v2f32.v8i8",
657  "arm.neon.bfdot.v4f32.v16i8",
658  Intrinsic::arm_neon_bfdot)
659  .Cases("aarch64.neon.bfdot.v2f32.v8i8",
660  "aarch64.neon.bfdot.v4f32.v16i8",
661  Intrinsic::aarch64_neon_bfdot)
663  if (IID == Intrinsic::not_intrinsic)
664  break;
665 
666  size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
667  assert((OperandWidth == 64 || OperandWidth == 128) &&
668  "Unexpected operand width");
669  LLVMContext &Ctx = F->getParent()->getContext();
670  std::array<Type *, 2> Tys {{
671  F->getReturnType(),
672  FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)
673  }};
674  NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
675  return true;
676  }
677 
678  // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic anymore
679  // and accept v8bf16 instead of v16i8
680  if ((Name.startswith("arm.neon.bfm") ||
681  Name.startswith("aarch64.neon.bfm")) &&
682  Name.endswith(".v4f32.v16i8")) {
683  Intrinsic::ID IID =
685  .Case("arm.neon.bfmmla.v4f32.v16i8",
686  Intrinsic::arm_neon_bfmmla)
687  .Case("arm.neon.bfmlalb.v4f32.v16i8",
688  Intrinsic::arm_neon_bfmlalb)
689  .Case("arm.neon.bfmlalt.v4f32.v16i8",
690  Intrinsic::arm_neon_bfmlalt)
691  .Case("aarch64.neon.bfmmla.v4f32.v16i8",
692  Intrinsic::aarch64_neon_bfmmla)
693  .Case("aarch64.neon.bfmlalb.v4f32.v16i8",
694  Intrinsic::aarch64_neon_bfmlalb)
695  .Case("aarch64.neon.bfmlalt.v4f32.v16i8",
696  Intrinsic::aarch64_neon_bfmlalt)
698  if (IID == Intrinsic::not_intrinsic)
699  break;
700 
701  std::array<Type *, 0> Tys;
702  NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
703  return true;
704  }
705 
706  if (Name == "arm.mve.vctp64" &&
707  cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
708  // A vctp64 returning a v4i1 is converted to return a v2i1. Rename the
709  // function and deal with it below in UpgradeIntrinsicCall.
710  rename(F);
711  return true;
712  }
713  // These too are changed to accept a v2i1 insteead of the old v4i1.
714  if (Name == "arm.mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
715  Name == "arm.mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
716  Name == "arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
717  Name == "arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
718  Name == "arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
719  Name == "arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
720  Name == "arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
721  Name == "arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
722  Name == "arm.cde.vcx1q.predicated.v2i64.v4i1" ||
723  Name == "arm.cde.vcx1qa.predicated.v2i64.v4i1" ||
724  Name == "arm.cde.vcx2q.predicated.v2i64.v4i1" ||
725  Name == "arm.cde.vcx2qa.predicated.v2i64.v4i1" ||
726  Name == "arm.cde.vcx3q.predicated.v2i64.v4i1" ||
727  Name == "arm.cde.vcx3qa.predicated.v2i64.v4i1")
728  return true;
729 
730  break;
731  }
732 
733  case 'c': {
734  if (Name.startswith("ctlz.") && F->arg_size() == 1) {
735  rename(F);
736  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
737  F->arg_begin()->getType());
738  return true;
739  }
740  if (Name.startswith("cttz.") && F->arg_size() == 1) {
741  rename(F);
742  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
743  F->arg_begin()->getType());
744  return true;
745  }
746  break;
747  }
748  case 'd': {
749  if (Name == "dbg.value" && F->arg_size() == 4) {
750  rename(F);
751  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
752  return true;
753  }
754  break;
755  }
756  case 'e': {
758  static const Regex R("^experimental.vector.reduce.([a-z]+)\\.[a-z][0-9]+");
759  if (R.match(Name, &Groups)) {
762  .Case("add", Intrinsic::vector_reduce_add)
763  .Case("mul", Intrinsic::vector_reduce_mul)
764  .Case("and", Intrinsic::vector_reduce_and)
765  .Case("or", Intrinsic::vector_reduce_or)
766  .Case("xor", Intrinsic::vector_reduce_xor)
767  .Case("smax", Intrinsic::vector_reduce_smax)
768  .Case("smin", Intrinsic::vector_reduce_smin)
769  .Case("umax", Intrinsic::vector_reduce_umax)
770  .Case("umin", Intrinsic::vector_reduce_umin)
771  .Case("fmax", Intrinsic::vector_reduce_fmax)
772  .Case("fmin", Intrinsic::vector_reduce_fmin)
774  if (ID != Intrinsic::not_intrinsic) {
775  rename(F);
776  auto Args = F->getFunctionType()->params();
777  NewFn = Intrinsic::getDeclaration(F->getParent(), ID, {Args[0]});
778  return true;
779  }
780  }
781  static const Regex R2(
782  "^experimental.vector.reduce.v2.([a-z]+)\\.[fi][0-9]+");
783  Groups.clear();
784  if (R2.match(Name, &Groups)) {
786  if (Groups[1] == "fadd")
787  ID = Intrinsic::vector_reduce_fadd;
788  if (Groups[1] == "fmul")
789  ID = Intrinsic::vector_reduce_fmul;
790  if (ID != Intrinsic::not_intrinsic) {
791  rename(F);
792  auto Args = F->getFunctionType()->params();
793  Type *Tys[] = {Args[1]};
794  NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
795  return true;
796  }
797  }
798  break;
799  }
800  case 'i':
801  case 'l': {
802  bool IsLifetimeStart = Name.startswith("lifetime.start");
803  if (IsLifetimeStart || Name.startswith("invariant.start")) {
804  Intrinsic::ID ID = IsLifetimeStart ?
805  Intrinsic::lifetime_start : Intrinsic::invariant_start;
806  auto Args = F->getFunctionType()->params();
807  Type* ObjectPtr[1] = {Args[1]};
808  if (F->getName() != Intrinsic::getName(ID, ObjectPtr, F->getParent())) {
809  rename(F);
810  NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
811  return true;
812  }
813  }
814 
815  bool IsLifetimeEnd = Name.startswith("lifetime.end");
816  if (IsLifetimeEnd || Name.startswith("invariant.end")) {
817  Intrinsic::ID ID = IsLifetimeEnd ?
818  Intrinsic::lifetime_end : Intrinsic::invariant_end;
819 
820  auto Args = F->getFunctionType()->params();
821  Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
822  if (F->getName() != Intrinsic::getName(ID, ObjectPtr, F->getParent())) {
823  rename(F);
824  NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
825  return true;
826  }
827  }
828  if (Name.startswith("invariant.group.barrier")) {
829  // Rename invariant.group.barrier to launder.invariant.group
830  auto Args = F->getFunctionType()->params();
831  Type* ObjectPtr[1] = {Args[0]};
832  rename(F);
833  NewFn = Intrinsic::getDeclaration(F->getParent(),
834  Intrinsic::launder_invariant_group, ObjectPtr);
835  return true;
836 
837  }
838 
839  break;
840  }
841  case 'm': {
842  if (Name.startswith("masked.load.")) {
843  Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
844  if (F->getName() !=
845  Intrinsic::getName(Intrinsic::masked_load, Tys, F->getParent())) {
846  rename(F);
847  NewFn = Intrinsic::getDeclaration(F->getParent(),
848  Intrinsic::masked_load,
849  Tys);
850  return true;
851  }
852  }
853  if (Name.startswith("masked.store.")) {
854  auto Args = F->getFunctionType()->params();
855  Type *Tys[] = { Args[0], Args[1] };
856  if (F->getName() !=
857  Intrinsic::getName(Intrinsic::masked_store, Tys, F->getParent())) {
858  rename(F);
859  NewFn = Intrinsic::getDeclaration(F->getParent(),
860  Intrinsic::masked_store,
861  Tys);
862  return true;
863  }
864  }
865  // Renaming gather/scatter intrinsics with no address space overloading
866  // to the new overload which includes an address space
867  if (Name.startswith("masked.gather.")) {
868  Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
869  if (F->getName() !=
870  Intrinsic::getName(Intrinsic::masked_gather, Tys, F->getParent())) {
871  rename(F);
872  NewFn = Intrinsic::getDeclaration(F->getParent(),
873  Intrinsic::masked_gather, Tys);
874  return true;
875  }
876  }
877  if (Name.startswith("masked.scatter.")) {
878  auto Args = F->getFunctionType()->params();
879  Type *Tys[] = {Args[0], Args[1]};
880  if (F->getName() !=
881  Intrinsic::getName(Intrinsic::masked_scatter, Tys, F->getParent())) {
882  rename(F);
883  NewFn = Intrinsic::getDeclaration(F->getParent(),
884  Intrinsic::masked_scatter, Tys);
885  return true;
886  }
887  }
888  // Updating the memory intrinsics (memcpy/memmove/memset) that have an
889  // alignment parameter to embedding the alignment as an attribute of
890  // the pointer args.
891  if (Name.startswith("memcpy.") && F->arg_size() == 5) {
892  rename(F);
893  // Get the types of dest, src, and len
894  ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
895  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy,
896  ParamTypes);
897  return true;
898  }
899  if (Name.startswith("memmove.") && F->arg_size() == 5) {
900  rename(F);
901  // Get the types of dest, src, and len
902  ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
903  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove,
904  ParamTypes);
905  return true;
906  }
907  if (Name.startswith("memset.") && F->arg_size() == 5) {
908  rename(F);
909  // Get the types of dest, and len
910  const auto *FT = F->getFunctionType();
911  Type *ParamTypes[2] = {
912  FT->getParamType(0), // Dest
913  FT->getParamType(2) // len
914  };
915  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
916  ParamTypes);
917  return true;
918  }
919  break;
920  }
921  case 'n': {
922  if (Name.startswith("nvvm.")) {
923  Name = Name.substr(5);
924 
925  // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
927  .Cases("brev32", "brev64", Intrinsic::bitreverse)
928  .Case("clz.i", Intrinsic::ctlz)
929  .Case("popc.i", Intrinsic::ctpop)
931  if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
932  NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
933  {F->getReturnType()});
934  return true;
935  }
936 
937  // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
938  // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
939  //
940  // TODO: We could add lohi.i2d.
941  bool Expand = StringSwitch<bool>(Name)
942  .Cases("abs.i", "abs.ll", true)
943  .Cases("clz.ll", "popc.ll", "h2f", true)
944  .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
945  .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
946  .StartsWith("atomic.load.add.f32.p", true)
947  .StartsWith("atomic.load.add.f64.p", true)
948  .Default(false);
949  if (Expand) {
950  NewFn = nullptr;
951  return true;
952  }
953  }
954  break;
955  }
956  case 'o':
957  // We only need to change the name to match the mangling including the
958  // address space.
959  if (Name.startswith("objectsize.")) {
960  Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
961  if (F->arg_size() == 2 || F->arg_size() == 3 ||
962  F->getName() !=
963  Intrinsic::getName(Intrinsic::objectsize, Tys, F->getParent())) {
964  rename(F);
965  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
966  Tys);
967  return true;
968  }
969  }
970  break;
971 
972  case 'p':
973  if (Name == "prefetch") {
974  // Handle address space overloading.
975  Type *Tys[] = {F->arg_begin()->getType()};
976  if (F->getName() !=
977  Intrinsic::getName(Intrinsic::prefetch, Tys, F->getParent())) {
978  rename(F);
979  NewFn =
980  Intrinsic::getDeclaration(F->getParent(), Intrinsic::prefetch, Tys);
981  return true;
982  }
983  } else if (Name.startswith("ptr.annotation.") && F->arg_size() == 4) {
984  rename(F);
985  NewFn = Intrinsic::getDeclaration(F->getParent(),
986  Intrinsic::ptr_annotation,
987  F->arg_begin()->getType());
988  return true;
989  }
990  break;
991 
992  case 's':
993  if (Name == "stackprotectorcheck") {
994  NewFn = nullptr;
995  return true;
996  }
997  break;
998 
999  case 'v': {
1000  if (Name == "var.annotation" && F->arg_size() == 4) {
1001  rename(F);
1002  NewFn = Intrinsic::getDeclaration(F->getParent(),
1003  Intrinsic::var_annotation);
1004  return true;
1005  }
1006  break;
1007  }
1008 
1009  case 'x':
1010  if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
1011  return true;
1012  }
1013  // Remangle our intrinsic since we upgrade the mangling
1015  if (Result != None) {
1016  NewFn = Result.getValue();
1017  return true;
1018  }
1019 
1020  // This may not belong here. This function is effectively being overloaded
1021  // to both detect an intrinsic which needs upgrading, and to provide the
1022  // upgraded form of the intrinsic. We should perhaps have two separate
1023  // functions for this.
1024  return false;
1025 }
1026 
1028  NewFn = nullptr;
1029  bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
1030  assert(F != NewFn && "Intrinsic function upgraded to the same function");
1031 
1032  // Upgrade intrinsic attributes. This does not change the function.
1033  if (NewFn)
1034  F = NewFn;
1035  if (Intrinsic::ID id = F->getIntrinsicID())
1036  F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
1037  return Upgraded;
1038 }
1039 
1041  if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1042  GV->getName() == "llvm.global_dtors")) ||
1043  !GV->hasInitializer())
1044  return nullptr;
1045  ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
1046  if (!ATy)
1047  return nullptr;
1048  StructType *STy = dyn_cast<StructType>(ATy->getElementType());
1049  if (!STy || STy->getNumElements() != 2)
1050  return nullptr;
1051 
1052  LLVMContext &C = GV->getContext();
1053  IRBuilder<> IRB(C);
1054  auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1055  IRB.getInt8PtrTy());
1056  Constant *Init = GV->getInitializer();
1057  unsigned N = Init->getNumOperands();
1058  std::vector<Constant *> NewCtors(N);
1059  for (unsigned i = 0; i != N; ++i) {
1060  auto Ctor = cast<Constant>(Init->getOperand(i));
1061  NewCtors[i] = ConstantStruct::get(
1062  EltTy, Ctor->getAggregateElement(0u), Ctor->getAggregateElement(1),
1064  }
1065  Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1066 
1067  return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1068  NewInit, GV->getName());
1069 }
1070 
1071 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1072 // to byte shuffles.
1074  Value *Op, unsigned Shift) {
1075  auto *ResultTy = cast<FixedVectorType>(Op->getType());
1076  unsigned NumElts = ResultTy->getNumElements() * 8;
1077 
1078  // Bitcast from a 64-bit element type to a byte element type.
1079  Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1080  Op = Builder.CreateBitCast(Op, VecTy, "cast");
1081 
1082  // We'll be shuffling in zeroes.
1083  Value *Res = Constant::getNullValue(VecTy);
1084 
1085  // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1086  // we'll just return the zero vector.
1087  if (Shift < 16) {
1088  int Idxs[64];
1089  // 256/512-bit version is split into 2/4 16-byte lanes.
1090  for (unsigned l = 0; l != NumElts; l += 16)
1091  for (unsigned i = 0; i != 16; ++i) {
1092  unsigned Idx = NumElts + i - Shift;
1093  if (Idx < NumElts)
1094  Idx -= NumElts - 16; // end of lane, switch operand.
1095  Idxs[l + i] = Idx + l;
1096  }
1097 
1098  Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
1099  }
1100 
1101  // Bitcast back to a 64-bit element type.
1102  return Builder.CreateBitCast(Res, ResultTy, "cast");
1103 }
1104 
1105 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1106 // to byte shuffles.
1108  unsigned Shift) {
1109  auto *ResultTy = cast<FixedVectorType>(Op->getType());
1110  unsigned NumElts = ResultTy->getNumElements() * 8;
1111 
1112  // Bitcast from a 64-bit element type to a byte element type.
1113  Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1114  Op = Builder.CreateBitCast(Op, VecTy, "cast");
1115 
1116  // We'll be shuffling in zeroes.
1117  Value *Res = Constant::getNullValue(VecTy);
1118 
1119  // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1120  // we'll just return the zero vector.
1121  if (Shift < 16) {
1122  int Idxs[64];
1123  // 256/512-bit version is split into 2/4 16-byte lanes.
1124  for (unsigned l = 0; l != NumElts; l += 16)
1125  for (unsigned i = 0; i != 16; ++i) {
1126  unsigned Idx = i + Shift;
1127  if (Idx >= 16)
1128  Idx += NumElts - 16; // end of lane, switch operand.
1129  Idxs[l + i] = Idx + l;
1130  }
1131 
1132  Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
1133  }
1134 
1135  // Bitcast back to a 64-bit element type.
1136  return Builder.CreateBitCast(Res, ResultTy, "cast");
1137 }
1138 
1140  unsigned NumElts) {
1141  assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1143  Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1144  Mask = Builder.CreateBitCast(Mask, MaskTy);
1145 
1146  // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1147  // i8 and we need to extract down to the right number of elements.
1148  if (NumElts <= 4) {
1149  int Indices[4];
1150  for (unsigned i = 0; i != NumElts; ++i)
1151  Indices[i] = i;
1152  Mask = Builder.CreateShuffleVector(
1153  Mask, Mask, makeArrayRef(Indices, NumElts), "extract");
1154  }
1155 
1156  return Mask;
1157 }
1158 
1160  Value *Op0, Value *Op1) {
1161  // If the mask is all ones just emit the first operation.
1162  if (const auto *C = dyn_cast<Constant>(Mask))
1163  if (C->isAllOnesValue())
1164  return Op0;
1165 
1167  cast<FixedVectorType>(Op0->getType())->getNumElements());
1168  return Builder.CreateSelect(Mask, Op0, Op1);
1169 }
1170 
1172  Value *Op0, Value *Op1) {
1173  // If the mask is all ones just emit the first operation.
1174  if (const auto *C = dyn_cast<Constant>(Mask))
1175  if (C->isAllOnesValue())
1176  return Op0;
1177 
1178  auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1179  Mask->getType()->getIntegerBitWidth());
1180  Mask = Builder.CreateBitCast(Mask, MaskTy);
1181  Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1182  return Builder.CreateSelect(Mask, Op0, Op1);
1183 }
1184 
1185 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1186 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1187 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1189  Value *Op1, Value *Shift,
1190  Value *Passthru, Value *Mask,
1191  bool IsVALIGN) {
1192  unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1193 
1194  unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1195  assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1196  assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1197  assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1198 
1199  // Mask the immediate for VALIGN.
1200  if (IsVALIGN)
1201  ShiftVal &= (NumElts - 1);
1202 
1203  // If palignr is shifting the pair of vectors more than the size of two
1204  // lanes, emit zero.
1205  if (ShiftVal >= 32)
1206  return llvm::Constant::getNullValue(Op0->getType());
1207 
1208  // If palignr is shifting the pair of input vectors more than one lane,
1209  // but less than two lanes, convert to shifting in zeroes.
1210  if (ShiftVal > 16) {
1211  ShiftVal -= 16;
1212  Op1 = Op0;
1213  Op0 = llvm::Constant::getNullValue(Op0->getType());
1214  }
1215 
1216  int Indices[64];
1217  // 256-bit palignr operates on 128-bit lanes so we need to handle that
1218  for (unsigned l = 0; l < NumElts; l += 16) {
1219  for (unsigned i = 0; i != 16; ++i) {
1220  unsigned Idx = ShiftVal + i;
1221  if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1222  Idx += NumElts - 16; // End of lane, switch operand.
1223  Indices[l + i] = Idx + l;
1224  }
1225  }
1226 
1227  Value *Align = Builder.CreateShuffleVector(Op1, Op0,
1228  makeArrayRef(Indices, NumElts),
1229  "palignr");
1230 
1231  return EmitX86Select(Builder, Mask, Align, Passthru);
1232 }
1233 
1235  bool ZeroMask, bool IndexForm) {
1236  Type *Ty = CI.getType();
1237  unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1238  unsigned EltWidth = Ty->getScalarSizeInBits();
1239  bool IsFloat = Ty->isFPOrFPVectorTy();
1240  Intrinsic::ID IID;
1241  if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1242  IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1243  else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1244  IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1245  else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1246  IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1247  else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1248  IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1249  else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1250  IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1251  else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1252  IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1253  else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1254  IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1255  else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1256  IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1257  else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1258  IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1259  else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1260  IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1261  else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1262  IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1263  else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1264  IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1265  else if (VecWidth == 128 && EltWidth == 16)
1266  IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1267  else if (VecWidth == 256 && EltWidth == 16)
1268  IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1269  else if (VecWidth == 512 && EltWidth == 16)
1270  IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1271  else if (VecWidth == 128 && EltWidth == 8)
1272  IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1273  else if (VecWidth == 256 && EltWidth == 8)
1274  IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1275  else if (VecWidth == 512 && EltWidth == 8)
1276  IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1277  else
1278  llvm_unreachable("Unexpected intrinsic");
1279 
1280  Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1281  CI.getArgOperand(2) };
1282 
1283  // If this isn't index form we need to swap operand 0 and 1.
1284  if (!IndexForm)
1285  std::swap(Args[0], Args[1]);
1286 
1287  Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1288  Args);
1289  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1290  : Builder.CreateBitCast(CI.getArgOperand(1),
1291  Ty);
1292  return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1293 }
1294 
1296  Intrinsic::ID IID) {
1297  Type *Ty = CI.getType();
1298  Value *Op0 = CI.getOperand(0);
1299  Value *Op1 = CI.getOperand(1);
1300  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1301  Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
1302 
1303  if (CI.arg_size() == 4) { // For masked intrinsics.
1304  Value *VecSrc = CI.getOperand(2);
1305  Value *Mask = CI.getOperand(3);
1306  Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1307  }
1308  return Res;
1309 }
1310 
1312  bool IsRotateRight) {
1313  Type *Ty = CI.getType();
1314  Value *Src = CI.getArgOperand(0);
1315  Value *Amt = CI.getArgOperand(1);
1316 
1317  // Amount may be scalar immediate, in which case create a splat vector.
1318  // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1319  // we only care about the lowest log2 bits anyway.
1320  if (Amt->getType() != Ty) {
1321  unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1322  Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1323  Amt = Builder.CreateVectorSplat(NumElts, Amt);
1324  }
1325 
1326  Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1327  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1328  Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
1329 
1330  if (CI.arg_size() == 4) { // For masked intrinsics.
1331  Value *VecSrc = CI.getOperand(2);
1332  Value *Mask = CI.getOperand(3);
1333  Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1334  }
1335  return Res;
1336 }
1337 
1338 static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallInst &CI, unsigned Imm,
1339  bool IsSigned) {
1340  Type *Ty = CI.getType();
1341  Value *LHS = CI.getArgOperand(0);
1342  Value *RHS = CI.getArgOperand(1);
1343 
1344  CmpInst::Predicate Pred;
1345  switch (Imm) {
1346  case 0x0:
1347  Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1348  break;
1349  case 0x1:
1350  Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1351  break;
1352  case 0x2:
1353  Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1354  break;
1355  case 0x3:
1356  Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1357  break;
1358  case 0x4:
1359  Pred = ICmpInst::ICMP_EQ;
1360  break;
1361  case 0x5:
1362  Pred = ICmpInst::ICMP_NE;
1363  break;
1364  case 0x6:
1365  return Constant::getNullValue(Ty); // FALSE
1366  case 0x7:
1367  return Constant::getAllOnesValue(Ty); // TRUE
1368  default:
1369  llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1370  }
1371 
1372  Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1373  Value *Ext = Builder.CreateSExt(Cmp, Ty);
1374  return Ext;
1375 }
1376 
1378  bool IsShiftRight, bool ZeroMask) {
1379  Type *Ty = CI.getType();
1380  Value *Op0 = CI.getArgOperand(0);
1381  Value *Op1 = CI.getArgOperand(1);
1382  Value *Amt = CI.getArgOperand(2);
1383 
1384  if (IsShiftRight)
1385  std::swap(Op0, Op1);
1386 
1387  // Amount may be scalar immediate, in which case create a splat vector.
1388  // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1389  // we only care about the lowest log2 bits anyway.
1390  if (Amt->getType() != Ty) {
1391  unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1392  Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1393  Amt = Builder.CreateVectorSplat(NumElts, Amt);
1394  }
1395 
1396  Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1397  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1398  Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
1399 
1400  unsigned NumArgs = CI.arg_size();
1401  if (NumArgs >= 4) { // For masked intrinsics.
1402  Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1403  ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
1404  CI.getArgOperand(0);
1405  Value *Mask = CI.getOperand(NumArgs - 1);
1406  Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1407  }
1408  return Res;
1409 }
1410 
1412  Value *Ptr, Value *Data, Value *Mask,
1413  bool Aligned) {
1414  // Cast the pointer to the right type.
1415  Ptr = Builder.CreateBitCast(Ptr,
1416  llvm::PointerType::getUnqual(Data->getType()));
1417  const Align Alignment =
1418  Aligned
1419  ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedSize() / 8)
1420  : Align(1);
1421 
1422  // If the mask is all ones just emit a regular store.
1423  if (const auto *C = dyn_cast<Constant>(Mask))
1424  if (C->isAllOnesValue())
1425  return Builder.CreateAlignedStore(Data, Ptr, Alignment);
1426 
1427  // Convert the mask from an integer type to a vector of i1.
1428  unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
1429  Mask = getX86MaskVec(Builder, Mask, NumElts);
1430  return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
1431 }
1432 
1434  Value *Ptr, Value *Passthru, Value *Mask,
1435  bool Aligned) {
1436  Type *ValTy = Passthru->getType();
1437  // Cast the pointer to the right type.
1438  Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy));
1439  const Align Alignment =
1440  Aligned
1441  ? Align(Passthru->getType()->getPrimitiveSizeInBits().getFixedSize() /
1442  8)
1443  : Align(1);
1444 
1445  // If the mask is all ones just emit a regular store.
1446  if (const auto *C = dyn_cast<Constant>(Mask))
1447  if (C->isAllOnesValue())
1448  return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
1449 
1450  // Convert the mask from an integer type to a vector of i1.
1451  unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
1452  Mask = getX86MaskVec(Builder, Mask, NumElts);
1453  return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
1454 }
1455 
1457  Type *Ty = CI.getType();
1458  Value *Op0 = CI.getArgOperand(0);
1460  Value *Res = Builder.CreateCall(F, {Op0, Builder.getInt1(false)});
1461  if (CI.arg_size() == 3)
1462  Res = EmitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
1463  return Res;
1464 }
1465 
1466 static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) {
1467  Type *Ty = CI.getType();
1468 
1469  // Arguments have a vXi32 type so cast to vXi64.
1470  Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1471  Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1472 
1473  if (IsSigned) {
1474  // Shift left then arithmetic shift right.
1475  Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1476  LHS = Builder.CreateShl(LHS, ShiftAmt);
1477  LHS = Builder.CreateAShr(LHS, ShiftAmt);
1478  RHS = Builder.CreateShl(RHS, ShiftAmt);
1479  RHS = Builder.CreateAShr(RHS, ShiftAmt);
1480  } else {
1481  // Clear the upper bits.
1482  Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1483  LHS = Builder.CreateAnd(LHS, Mask);
1484  RHS = Builder.CreateAnd(RHS, Mask);
1485  }
1486 
1487  Value *Res = Builder.CreateMul(LHS, RHS);
1488 
1489  if (CI.arg_size() == 4)
1490  Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1491 
1492  return Res;
1493 }
1494 
1495 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1497  Value *Mask) {
1498  unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
1499  if (Mask) {
1500  const auto *C = dyn_cast<Constant>(Mask);
1501  if (!C || !C->isAllOnesValue())
1502  Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1503  }
1504 
1505  if (NumElts < 8) {
1506  int Indices[8];
1507  for (unsigned i = 0; i != NumElts; ++i)
1508  Indices[i] = i;
1509  for (unsigned i = NumElts; i != 8; ++i)
1510  Indices[i] = NumElts + i % NumElts;
1511  Vec = Builder.CreateShuffleVector(Vec,
1513  Indices);
1514  }
1515  return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1516 }
1517 
1519  unsigned CC, bool Signed) {
1520  Value *Op0 = CI.getArgOperand(0);
1521  unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1522 
1523  Value *Cmp;
1524  if (CC == 3) {
1525  Cmp = Constant::getNullValue(
1526  FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1527  } else if (CC == 7) {
1529  FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1530  } else {
1531  ICmpInst::Predicate Pred;
1532  switch (CC) {
1533  default: llvm_unreachable("Unknown condition code");
1534  case 0: Pred = ICmpInst::ICMP_EQ; break;
1535  case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1536  case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1537  case 4: Pred = ICmpInst::ICMP_NE; break;
1538  case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1539  case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1540  }
1541  Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
1542  }
1543 
1544  Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
1545 
1546  return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
1547 }
1548 
1549 // Replace a masked intrinsic with an older unmasked intrinsic.
1551  Intrinsic::ID IID) {
1552  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
1553  Value *Rep = Builder.CreateCall(Intrin,
1554  { CI.getArgOperand(0), CI.getArgOperand(1) });
1555  return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
1556 }
1557 
1559  Value* A = CI.getArgOperand(0);
1560  Value* B = CI.getArgOperand(1);
1561  Value* Src = CI.getArgOperand(2);
1562  Value* Mask = CI.getArgOperand(3);
1563 
1564  Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
1565  Value* Cmp = Builder.CreateIsNotNull(AndNode);
1566  Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
1567  Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
1568  Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
1569  return Builder.CreateInsertElement(A, Select, (uint64_t)0);
1570 }
1571 
1572 
1574  Value* Op = CI.getArgOperand(0);
1575  Type* ReturnOp = CI.getType();
1576  unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
1577  Value *Mask = getX86MaskVec(Builder, Op, NumElts);
1578  return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1579 }
1580 
1581 // Replace intrinsic with unmasked version and a select.
1583  CallInst &CI, Value *&Rep) {
1584  Name = Name.substr(12); // Remove avx512.mask.
1585 
1586  unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
1587  unsigned EltWidth = CI.getType()->getScalarSizeInBits();
1588  Intrinsic::ID IID;
1589  if (Name.startswith("max.p")) {
1590  if (VecWidth == 128 && EltWidth == 32)
1591  IID = Intrinsic::x86_sse_max_ps;
1592  else if (VecWidth == 128 && EltWidth == 64)
1593  IID = Intrinsic::x86_sse2_max_pd;
1594  else if (VecWidth == 256 && EltWidth == 32)
1595  IID = Intrinsic::x86_avx_max_ps_256;
1596  else if (VecWidth == 256 && EltWidth == 64)
1597  IID = Intrinsic::x86_avx_max_pd_256;
1598  else
1599  llvm_unreachable("Unexpected intrinsic");
1600  } else if (Name.startswith("min.p")) {
1601  if (VecWidth == 128 && EltWidth == 32)
1602  IID = Intrinsic::x86_sse_min_ps;
1603  else if (VecWidth == 128 && EltWidth == 64)
1604  IID = Intrinsic::x86_sse2_min_pd;
1605  else if (VecWidth == 256 && EltWidth == 32)
1606  IID = Intrinsic::x86_avx_min_ps_256;
1607  else if (VecWidth == 256 && EltWidth == 64)
1608  IID = Intrinsic::x86_avx_min_pd_256;
1609  else
1610  llvm_unreachable("Unexpected intrinsic");
1611  } else if (Name.startswith("pshuf.b.")) {
1612  if (VecWidth == 128)
1613  IID = Intrinsic::x86_ssse3_pshuf_b_128;
1614  else if (VecWidth == 256)
1615  IID = Intrinsic::x86_avx2_pshuf_b;
1616  else if (VecWidth == 512)
1617  IID = Intrinsic::x86_avx512_pshuf_b_512;
1618  else
1619  llvm_unreachable("Unexpected intrinsic");
1620  } else if (Name.startswith("pmul.hr.sw.")) {
1621  if (VecWidth == 128)
1622  IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
1623  else if (VecWidth == 256)
1624  IID = Intrinsic::x86_avx2_pmul_hr_sw;
1625  else if (VecWidth == 512)
1626  IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
1627  else
1628  llvm_unreachable("Unexpected intrinsic");
1629  } else if (Name.startswith("pmulh.w.")) {
1630  if (VecWidth == 128)
1631  IID = Intrinsic::x86_sse2_pmulh_w;
1632  else if (VecWidth == 256)
1633  IID = Intrinsic::x86_avx2_pmulh_w;
1634  else if (VecWidth == 512)
1635  IID = Intrinsic::x86_avx512_pmulh_w_512;
1636  else
1637  llvm_unreachable("Unexpected intrinsic");
1638  } else if (Name.startswith("pmulhu.w.")) {
1639  if (VecWidth == 128)
1640  IID = Intrinsic::x86_sse2_pmulhu_w;
1641  else if (VecWidth == 256)
1642  IID = Intrinsic::x86_avx2_pmulhu_w;
1643  else if (VecWidth == 512)
1644  IID = Intrinsic::x86_avx512_pmulhu_w_512;
1645  else
1646  llvm_unreachable("Unexpected intrinsic");
1647  } else if (Name.startswith("pmaddw.d.")) {
1648  if (VecWidth == 128)
1649  IID = Intrinsic::x86_sse2_pmadd_wd;
1650  else if (VecWidth == 256)
1651  IID = Intrinsic::x86_avx2_pmadd_wd;
1652  else if (VecWidth == 512)
1653  IID = Intrinsic::x86_avx512_pmaddw_d_512;
1654  else
1655  llvm_unreachable("Unexpected intrinsic");
1656  } else if (Name.startswith("pmaddubs.w.")) {
1657  if (VecWidth == 128)
1658  IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
1659  else if (VecWidth == 256)
1660  IID = Intrinsic::x86_avx2_pmadd_ub_sw;
1661  else if (VecWidth == 512)
1662  IID = Intrinsic::x86_avx512_pmaddubs_w_512;
1663  else
1664  llvm_unreachable("Unexpected intrinsic");
1665  } else if (Name.startswith("packsswb.")) {
1666  if (VecWidth == 128)
1667  IID = Intrinsic::x86_sse2_packsswb_128;
1668  else if (VecWidth == 256)
1669  IID = Intrinsic::x86_avx2_packsswb;
1670  else if (VecWidth == 512)
1671  IID = Intrinsic::x86_avx512_packsswb_512;
1672  else
1673  llvm_unreachable("Unexpected intrinsic");
1674  } else if (Name.startswith("packssdw.")) {
1675  if (VecWidth == 128)
1676  IID = Intrinsic::x86_sse2_packssdw_128;
1677  else if (VecWidth == 256)
1678  IID = Intrinsic::x86_avx2_packssdw;
1679  else if (VecWidth == 512)
1680  IID = Intrinsic::x86_avx512_packssdw_512;
1681  else
1682  llvm_unreachable("Unexpected intrinsic");
1683  } else if (Name.startswith("packuswb.")) {
1684  if (VecWidth == 128)
1685  IID = Intrinsic::x86_sse2_packuswb_128;
1686  else if (VecWidth == 256)
1687  IID = Intrinsic::x86_avx2_packuswb;
1688  else if (VecWidth == 512)
1689  IID = Intrinsic::x86_avx512_packuswb_512;
1690  else
1691  llvm_unreachable("Unexpected intrinsic");
1692  } else if (Name.startswith("packusdw.")) {
1693  if (VecWidth == 128)
1694  IID = Intrinsic::x86_sse41_packusdw;
1695  else if (VecWidth == 256)
1696  IID = Intrinsic::x86_avx2_packusdw;
1697  else if (VecWidth == 512)
1698  IID = Intrinsic::x86_avx512_packusdw_512;
1699  else
1700  llvm_unreachable("Unexpected intrinsic");
1701  } else if (Name.startswith("vpermilvar.")) {
1702  if (VecWidth == 128 && EltWidth == 32)
1703  IID = Intrinsic::x86_avx_vpermilvar_ps;
1704  else if (VecWidth == 128 && EltWidth == 64)
1705  IID = Intrinsic::x86_avx_vpermilvar_pd;
1706  else if (VecWidth == 256 && EltWidth == 32)
1707  IID = Intrinsic::x86_avx_vpermilvar_ps_256;
1708  else if (VecWidth == 256 && EltWidth == 64)
1709  IID = Intrinsic::x86_avx_vpermilvar_pd_256;
1710  else if (VecWidth == 512 && EltWidth == 32)
1711  IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
1712  else if (VecWidth == 512 && EltWidth == 64)
1713  IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
1714  else
1715  llvm_unreachable("Unexpected intrinsic");
1716  } else if (Name == "cvtpd2dq.256") {
1717  IID = Intrinsic::x86_avx_cvt_pd2dq_256;
1718  } else if (Name == "cvtpd2ps.256") {
1719  IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
1720  } else if (Name == "cvttpd2dq.256") {
1721  IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
1722  } else if (Name == "cvttps2dq.128") {
1723  IID = Intrinsic::x86_sse2_cvttps2dq;
1724  } else if (Name == "cvttps2dq.256") {
1725  IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
1726  } else if (Name.startswith("permvar.")) {
1727  bool IsFloat = CI.getType()->isFPOrFPVectorTy();
1728  if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1729  IID = Intrinsic::x86_avx2_permps;
1730  else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1731  IID = Intrinsic::x86_avx2_permd;
1732  else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1733  IID = Intrinsic::x86_avx512_permvar_df_256;
1734  else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1735  IID = Intrinsic::x86_avx512_permvar_di_256;
1736  else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1737  IID = Intrinsic::x86_avx512_permvar_sf_512;
1738  else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1739  IID = Intrinsic::x86_avx512_permvar_si_512;
1740  else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1741  IID = Intrinsic::x86_avx512_permvar_df_512;
1742  else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1743  IID = Intrinsic::x86_avx512_permvar_di_512;
1744  else if (VecWidth == 128 && EltWidth == 16)
1745  IID = Intrinsic::x86_avx512_permvar_hi_128;
1746  else if (VecWidth == 256 && EltWidth == 16)
1747  IID = Intrinsic::x86_avx512_permvar_hi_256;
1748  else if (VecWidth == 512 && EltWidth == 16)
1749  IID = Intrinsic::x86_avx512_permvar_hi_512;
1750  else if (VecWidth == 128 && EltWidth == 8)
1751  IID = Intrinsic::x86_avx512_permvar_qi_128;
1752  else if (VecWidth == 256 && EltWidth == 8)
1753  IID = Intrinsic::x86_avx512_permvar_qi_256;
1754  else if (VecWidth == 512 && EltWidth == 8)
1755  IID = Intrinsic::x86_avx512_permvar_qi_512;
1756  else
1757  llvm_unreachable("Unexpected intrinsic");
1758  } else if (Name.startswith("dbpsadbw.")) {
1759  if (VecWidth == 128)
1760  IID = Intrinsic::x86_avx512_dbpsadbw_128;
1761  else if (VecWidth == 256)
1762  IID = Intrinsic::x86_avx512_dbpsadbw_256;
1763  else if (VecWidth == 512)
1764  IID = Intrinsic::x86_avx512_dbpsadbw_512;
1765  else
1766  llvm_unreachable("Unexpected intrinsic");
1767  } else if (Name.startswith("pmultishift.qb.")) {
1768  if (VecWidth == 128)
1769  IID = Intrinsic::x86_avx512_pmultishift_qb_128;
1770  else if (VecWidth == 256)
1771  IID = Intrinsic::x86_avx512_pmultishift_qb_256;
1772  else if (VecWidth == 512)
1773  IID = Intrinsic::x86_avx512_pmultishift_qb_512;
1774  else
1775  llvm_unreachable("Unexpected intrinsic");
1776  } else if (Name.startswith("conflict.")) {
1777  if (Name[9] == 'd' && VecWidth == 128)
1778  IID = Intrinsic::x86_avx512_conflict_d_128;
1779  else if (Name[9] == 'd' && VecWidth == 256)
1780  IID = Intrinsic::x86_avx512_conflict_d_256;
1781  else if (Name[9] == 'd' && VecWidth == 512)
1782  IID = Intrinsic::x86_avx512_conflict_d_512;
1783  else if (Name[9] == 'q' && VecWidth == 128)
1784  IID = Intrinsic::x86_avx512_conflict_q_128;
1785  else if (Name[9] == 'q' && VecWidth == 256)
1786  IID = Intrinsic::x86_avx512_conflict_q_256;
1787  else if (Name[9] == 'q' && VecWidth == 512)
1788  IID = Intrinsic::x86_avx512_conflict_q_512;
1789  else
1790  llvm_unreachable("Unexpected intrinsic");
1791  } else if (Name.startswith("pavg.")) {
1792  if (Name[5] == 'b' && VecWidth == 128)
1793  IID = Intrinsic::x86_sse2_pavg_b;
1794  else if (Name[5] == 'b' && VecWidth == 256)
1795  IID = Intrinsic::x86_avx2_pavg_b;
1796  else if (Name[5] == 'b' && VecWidth == 512)
1797  IID = Intrinsic::x86_avx512_pavg_b_512;
1798  else if (Name[5] == 'w' && VecWidth == 128)
1799  IID = Intrinsic::x86_sse2_pavg_w;
1800  else if (Name[5] == 'w' && VecWidth == 256)
1801  IID = Intrinsic::x86_avx2_pavg_w;
1802  else if (Name[5] == 'w' && VecWidth == 512)
1803  IID = Intrinsic::x86_avx512_pavg_w_512;
1804  else
1805  llvm_unreachable("Unexpected intrinsic");
1806  } else
1807  return false;
1808 
1810  Args.pop_back();
1811  Args.pop_back();
1812  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1813  Args);
1814  unsigned NumArgs = CI.arg_size();
1815  Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
1816  CI.getArgOperand(NumArgs - 2));
1817  return true;
1818 }
1819 
1820 /// Upgrade comment in call to inline asm that represents an objc retain release
1821 /// marker.
1822 void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
1823  size_t Pos;
1824  if (AsmStr->find("mov\tfp") == 0 &&
1825  AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
1826  (Pos = AsmStr->find("# marker")) != std::string::npos) {
1827  AsmStr->replace(Pos, 1, ";");
1828  }
1829 }
1830 
1832  IRBuilder<> &Builder) {
1833  if (Name == "mve.vctp64.old") {
1834  // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
1835  // correct type.
1836  Value *VCTP = Builder.CreateCall(
1837  Intrinsic::getDeclaration(F->getParent(), Intrinsic::arm_mve_vctp64),
1838  CI->getArgOperand(0), CI->getName());
1839  Value *C1 = Builder.CreateCall(
1841  F->getParent(), Intrinsic::arm_mve_pred_v2i,
1842  {VectorType::get(Builder.getInt1Ty(), 2, false)}),
1843  VCTP);
1844  return Builder.CreateCall(
1846  F->getParent(), Intrinsic::arm_mve_pred_i2v,
1847  {VectorType::get(Builder.getInt1Ty(), 4, false)}),
1848  C1);
1849  } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
1850  Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
1851  Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
1852  Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
1853  Name == "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
1854  Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
1855  Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
1856  Name == "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
1857  Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
1858  Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
1859  Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
1860  Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
1861  Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
1862  Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
1863  std::vector<Type *> Tys;
1864  unsigned ID = CI->getIntrinsicID();
1865  Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
1866  switch (ID) {
1867  case Intrinsic::arm_mve_mull_int_predicated:
1868  case Intrinsic::arm_mve_vqdmull_predicated:
1869  case Intrinsic::arm_mve_vldr_gather_base_predicated:
1870  Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
1871  break;
1872  case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
1873  case Intrinsic::arm_mve_vstr_scatter_base_predicated:
1874  case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
1875  Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
1876  V2I1Ty};
1877  break;
1878  case Intrinsic::arm_mve_vldr_gather_offset_predicated:
1879  Tys = {CI->getType(), CI->getOperand(0)->getType(),
1880  CI->getOperand(1)->getType(), V2I1Ty};
1881  break;
1882  case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
1883  Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
1884  CI->getOperand(2)->getType(), V2I1Ty};
1885  break;
1886  case Intrinsic::arm_cde_vcx1q_predicated:
1887  case Intrinsic::arm_cde_vcx1qa_predicated:
1888  case Intrinsic::arm_cde_vcx2q_predicated:
1889  case Intrinsic::arm_cde_vcx2qa_predicated:
1890  case Intrinsic::arm_cde_vcx3q_predicated:
1891  case Intrinsic::arm_cde_vcx3qa_predicated:
1892  Tys = {CI->getOperand(1)->getType(), V2I1Ty};
1893  break;
1894  default:
1895  llvm_unreachable("Unhandled Intrinsic!");
1896  }
1897 
1898  std::vector<Value *> Ops;
1899  for (Value *Op : CI->args()) {
1900  Type *Ty = Op->getType();
1901  if (Ty->getScalarSizeInBits() == 1) {
1902  Value *C1 = Builder.CreateCall(
1904  F->getParent(), Intrinsic::arm_mve_pred_v2i,
1905  {VectorType::get(Builder.getInt1Ty(), 4, false)}),
1906  Op);
1907  Op = Builder.CreateCall(
1908  Intrinsic::getDeclaration(F->getParent(),
1909  Intrinsic::arm_mve_pred_i2v, {V2I1Ty}),
1910  C1);
1911  }
1912  Ops.push_back(Op);
1913  }
1914 
1915  Function *Fn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
1916  return Builder.CreateCall(Fn, Ops, CI->getName());
1917  }
1918  llvm_unreachable("Unknown function for ARM CallInst upgrade.");
1919 }
1920 
1921 /// Upgrade a call to an old intrinsic. All argument and return casting must be
1922 /// provided to seamlessly integrate with existing context.
1924  Function *F = CI->getCalledFunction();
1925  LLVMContext &C = CI->getContext();
1927  Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
1928 
1929  assert(F && "Intrinsic call is not direct?");
1930 
1931  if (!NewFn) {
1932  // Get the Function's name.
1933  StringRef Name = F->getName();
1934 
1935  assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
1936  Name = Name.substr(5);
1937 
1938  bool IsX86 = Name.startswith("x86.");
1939  if (IsX86)
1940  Name = Name.substr(4);
1941  bool IsNVVM = Name.startswith("nvvm.");
1942  if (IsNVVM)
1943  Name = Name.substr(5);
1944  bool IsARM = Name.startswith("arm.");
1945  if (IsARM)
1946  Name = Name.substr(4);
1947 
1948  if (IsX86 && Name.startswith("sse4a.movnt.")) {
1949  Module *M = F->getParent();
1951  Elts.push_back(
1953  MDNode *Node = MDNode::get(C, Elts);
1954 
1955  Value *Arg0 = CI->getArgOperand(0);
1956  Value *Arg1 = CI->getArgOperand(1);
1957 
1958  // Nontemporal (unaligned) store of the 0'th element of the float/double
1959  // vector.
1960  Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
1961  PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
1962  Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
1963  Value *Extract =
1964  Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
1965 
1966  StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, Align(1));
1967  SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1968 
1969  // Remove intrinsic.
1970  CI->eraseFromParent();
1971  return;
1972  }
1973 
1974  if (IsX86 && (Name.startswith("avx.movnt.") ||
1975  Name.startswith("avx512.storent."))) {
1976  Module *M = F->getParent();
1978  Elts.push_back(
1980  MDNode *Node = MDNode::get(C, Elts);
1981 
1982  Value *Arg0 = CI->getArgOperand(0);
1983  Value *Arg1 = CI->getArgOperand(1);
1984 
1985  // Convert the type of the pointer to a pointer to the stored type.
1986  Value *BC = Builder.CreateBitCast(Arg0,
1988  "cast");
1989  StoreInst *SI = Builder.CreateAlignedStore(
1990  Arg1, BC,
1991  Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedSize() / 8));
1992  SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1993 
1994  // Remove intrinsic.
1995  CI->eraseFromParent();
1996  return;
1997  }
1998 
1999  if (IsX86 && Name == "sse2.storel.dq") {
2000  Value *Arg0 = CI->getArgOperand(0);
2001  Value *Arg1 = CI->getArgOperand(1);
2002 
2003  auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2004  Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2005  Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2006  Value *BC = Builder.CreateBitCast(Arg0,
2008  "cast");
2009  Builder.CreateAlignedStore(Elt, BC, Align(1));
2010 
2011  // Remove intrinsic.
2012  CI->eraseFromParent();
2013  return;
2014  }
2015 
2016  if (IsX86 && (Name.startswith("sse.storeu.") ||
2017  Name.startswith("sse2.storeu.") ||
2018  Name.startswith("avx.storeu."))) {
2019  Value *Arg0 = CI->getArgOperand(0);
2020  Value *Arg1 = CI->getArgOperand(1);
2021 
2022  Arg0 = Builder.CreateBitCast(Arg0,
2024  "cast");
2025  Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2026 
2027  // Remove intrinsic.
2028  CI->eraseFromParent();
2029  return;
2030  }
2031 
2032  if (IsX86 && Name == "avx512.mask.store.ss") {
2033  Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2035  Mask, false);
2036 
2037  // Remove intrinsic.
2038  CI->eraseFromParent();
2039  return;
2040  }
2041 
2042  if (IsX86 && (Name.startswith("avx512.mask.store"))) {
2043  // "avx512.mask.storeu." or "avx512.mask.store."
2044  bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2046  CI->getArgOperand(2), Aligned);
2047 
2048  // Remove intrinsic.
2049  CI->eraseFromParent();
2050  return;
2051  }
2052 
2053  Value *Rep;
2054  // Upgrade packed integer vector compare intrinsics to compare instructions.
2055  if (IsX86 && (Name.startswith("sse2.pcmp") ||
2056  Name.startswith("avx2.pcmp"))) {
2057  // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2058  bool CmpEq = Name[9] == 'e';
2059  Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2060  CI->getArgOperand(0), CI->getArgOperand(1));
2061  Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2062  } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {
2063  Type *ExtTy = Type::getInt32Ty(C);
2064  if (CI->getOperand(0)->getType()->isIntegerTy(8))
2065  ExtTy = Type::getInt64Ty(C);
2066  unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2067  ExtTy->getPrimitiveSizeInBits();
2068  Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2069  Rep = Builder.CreateVectorSplat(NumElts, Rep);
2070  } else if (IsX86 && (Name == "sse.sqrt.ss" ||
2071  Name == "sse2.sqrt.sd")) {
2072  Value *Vec = CI->getArgOperand(0);
2073  Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2074  Function *Intr = Intrinsic::getDeclaration(F->getParent(),
2075  Intrinsic::sqrt, Elt0->getType());
2076  Elt0 = Builder.CreateCall(Intr, Elt0);
2077  Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2078  } else if (IsX86 && (Name.startswith("avx.sqrt.p") ||
2079  Name.startswith("sse2.sqrt.p") ||
2080  Name.startswith("sse.sqrt.p"))) {
2081  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2082  Intrinsic::sqrt,
2083  CI->getType()),
2084  {CI->getArgOperand(0)});
2085  } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) {
2086  if (CI->arg_size() == 4 &&
2087  (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2088  cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2089  Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2090  : Intrinsic::x86_avx512_sqrt_pd_512;
2091 
2092  Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
2093  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
2094  IID), Args);
2095  } else {
2096  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2097  Intrinsic::sqrt,
2098  CI->getType()),
2099  {CI->getArgOperand(0)});
2100  }
2101  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2102  CI->getArgOperand(1));
2103  } else if (IsX86 && (Name.startswith("avx512.ptestm") ||
2104  Name.startswith("avx512.ptestnm"))) {
2105  Value *Op0 = CI->getArgOperand(0);
2106  Value *Op1 = CI->getArgOperand(1);
2107  Value *Mask = CI->getArgOperand(2);
2108  Rep = Builder.CreateAnd(Op0, Op1);
2109  llvm::Type *Ty = Op0->getType();
2110  Value *Zero = llvm::Constant::getNullValue(Ty);
2111  ICmpInst::Predicate Pred =
2112  Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2113  Rep = Builder.CreateICmp(Pred, Rep, Zero);
2114  Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
2115  } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
2116  unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2117  ->getNumElements();
2118  Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2119  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2120  CI->getArgOperand(1));
2121  } else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
2122  unsigned NumElts = CI->getType()->getScalarSizeInBits();
2123  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2124  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2125  int Indices[64];
2126  for (unsigned i = 0; i != NumElts; ++i)
2127  Indices[i] = i;
2128 
2129  // First extract half of each vector. This gives better codegen than
2130  // doing it in a single shuffle.
2131  LHS = Builder.CreateShuffleVector(LHS, LHS,
2132  makeArrayRef(Indices, NumElts / 2));
2133  RHS = Builder.CreateShuffleVector(RHS, RHS,
2134  makeArrayRef(Indices, NumElts / 2));
2135  // Concat the vectors.
2136  // NOTE: Operands have to be swapped to match intrinsic definition.
2137  Rep = Builder.CreateShuffleVector(RHS, LHS,
2138  makeArrayRef(Indices, NumElts));
2139  Rep = Builder.CreateBitCast(Rep, CI->getType());
2140  } else if (IsX86 && Name == "avx512.kand.w") {
2141  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2142  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2143  Rep = Builder.CreateAnd(LHS, RHS);
2144  Rep = Builder.CreateBitCast(Rep, CI->getType());
2145  } else if (IsX86 && Name == "avx512.kandn.w") {
2146  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2147  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2148  LHS = Builder.CreateNot(LHS);
2149  Rep = Builder.CreateAnd(LHS, RHS);
2150  Rep = Builder.CreateBitCast(Rep, CI->getType());
2151  } else if (IsX86 && Name == "avx512.kor.w") {
2152  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2153  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2154  Rep = Builder.CreateOr(LHS, RHS);
2155  Rep = Builder.CreateBitCast(Rep, CI->getType());
2156  } else if (IsX86 && Name == "avx512.kxor.w") {
2157  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2158  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2159  Rep = Builder.CreateXor(LHS, RHS);
2160  Rep = Builder.CreateBitCast(Rep, CI->getType());
2161  } else if (IsX86 && Name == "avx512.kxnor.w") {
2162  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2163  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2164  LHS = Builder.CreateNot(LHS);
2165  Rep = Builder.CreateXor(LHS, RHS);
2166  Rep = Builder.CreateBitCast(Rep, CI->getType());
2167  } else if (IsX86 && Name == "avx512.knot.w") {
2168  Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2169  Rep = Builder.CreateNot(Rep);
2170  Rep = Builder.CreateBitCast(Rep, CI->getType());
2171  } else if (IsX86 &&
2172  (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
2173  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2174  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2175  Rep = Builder.CreateOr(LHS, RHS);
2176  Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2177  Value *C;
2178  if (Name[14] == 'c')
2179  C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2180  else
2181  C = ConstantInt::getNullValue(Builder.getInt16Ty());
2182  Rep = Builder.CreateICmpEQ(Rep, C);
2183  Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2184  } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2185  Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2186  Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2187  Name == "sse.div.ss" || Name == "sse2.div.sd")) {
2188  Type *I32Ty = Type::getInt32Ty(C);
2189  Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2190  ConstantInt::get(I32Ty, 0));
2191  Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2192  ConstantInt::get(I32Ty, 0));
2193  Value *EltOp;
2194  if (Name.contains(".add."))
2195  EltOp = Builder.CreateFAdd(Elt0, Elt1);
2196  else if (Name.contains(".sub."))
2197  EltOp = Builder.CreateFSub(Elt0, Elt1);
2198  else if (Name.contains(".mul."))
2199  EltOp = Builder.CreateFMul(Elt0, Elt1);
2200  else
2201  EltOp = Builder.CreateFDiv(Elt0, Elt1);
2202  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2203  ConstantInt::get(I32Ty, 0));
2204  } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
2205  // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2206  bool CmpEq = Name[16] == 'e';
2207  Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2208  } else if (IsX86 && Name.startswith("avx512.mask.vpshufbitqmb.")) {
2209  Type *OpTy = CI->getArgOperand(0)->getType();
2210  unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2211  Intrinsic::ID IID;
2212  switch (VecWidth) {
2213  default: llvm_unreachable("Unexpected intrinsic");
2214  case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break;
2215  case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break;
2216  case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break;
2217  }
2218 
2219  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2220  { CI->getOperand(0), CI->getArgOperand(1) });
2221  Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2222  } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
2223  Type *OpTy = CI->getArgOperand(0)->getType();
2224  unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2225  unsigned EltWidth = OpTy->getScalarSizeInBits();
2226  Intrinsic::ID IID;
2227  if (VecWidth == 128 && EltWidth == 32)
2228  IID = Intrinsic::x86_avx512_fpclass_ps_128;
2229  else if (VecWidth == 256 && EltWidth == 32)
2230  IID = Intrinsic::x86_avx512_fpclass_ps_256;
2231  else if (VecWidth == 512 && EltWidth == 32)
2232  IID = Intrinsic::x86_avx512_fpclass_ps_512;
2233  else if (VecWidth == 128 && EltWidth == 64)
2234  IID = Intrinsic::x86_avx512_fpclass_pd_128;
2235  else if (VecWidth == 256 && EltWidth == 64)
2236  IID = Intrinsic::x86_avx512_fpclass_pd_256;
2237  else if (VecWidth == 512 && EltWidth == 64)
2238  IID = Intrinsic::x86_avx512_fpclass_pd_512;
2239  else
2240  llvm_unreachable("Unexpected intrinsic");
2241 
2242  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2243  { CI->getOperand(0), CI->getArgOperand(1) });
2244  Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2245  } else if (IsX86 && Name.startswith("avx512.cmp.p")) {
2247  Type *OpTy = Args[0]->getType();
2248  unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2249  unsigned EltWidth = OpTy->getScalarSizeInBits();
2250  Intrinsic::ID IID;
2251  if (VecWidth == 128 && EltWidth == 32)
2252  IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2253  else if (VecWidth == 256 && EltWidth == 32)
2254  IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2255  else if (VecWidth == 512 && EltWidth == 32)
2256  IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2257  else if (VecWidth == 128 && EltWidth == 64)
2258  IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2259  else if (VecWidth == 256 && EltWidth == 64)
2260  IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2261  else if (VecWidth == 512 && EltWidth == 64)
2262  IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2263  else
2264  llvm_unreachable("Unexpected intrinsic");
2265 
2267  if (VecWidth == 512)
2268  std::swap(Mask, Args.back());
2269  Args.push_back(Mask);
2270 
2271  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2272  Args);
2273  } else if (IsX86 && Name.startswith("avx512.mask.cmp.")) {
2274  // Integer compare intrinsics.
2275  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2276  Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2277  } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) {
2278  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2279  Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2280  } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") ||
2281  Name.startswith("avx512.cvtw2mask.") ||
2282  Name.startswith("avx512.cvtd2mask.") ||
2283  Name.startswith("avx512.cvtq2mask."))) {
2284  Value *Op = CI->getArgOperand(0);
2285  Value *Zero = llvm::Constant::getNullValue(Op->getType());
2286  Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2287  Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2288  } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
2289  Name == "ssse3.pabs.w.128" ||
2290  Name == "ssse3.pabs.d.128" ||
2291  Name.startswith("avx2.pabs") ||
2292  Name.startswith("avx512.mask.pabs"))) {
2293  Rep = upgradeAbs(Builder, *CI);
2294  } else if (IsX86 && (Name == "sse41.pmaxsb" ||
2295  Name == "sse2.pmaxs.w" ||
2296  Name == "sse41.pmaxsd" ||
2297  Name.startswith("avx2.pmaxs") ||
2298  Name.startswith("avx512.mask.pmaxs"))) {
2300  } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
2301  Name == "sse41.pmaxuw" ||
2302  Name == "sse41.pmaxud" ||
2303  Name.startswith("avx2.pmaxu") ||
2304  Name.startswith("avx512.mask.pmaxu"))) {
2306  } else if (IsX86 && (Name == "sse41.pminsb" ||
2307  Name == "sse2.pmins.w" ||
2308  Name == "sse41.pminsd" ||
2309  Name.startswith("avx2.pmins") ||
2310  Name.startswith("avx512.mask.pmins"))) {
2312  } else if (IsX86 && (Name == "sse2.pminu.b" ||
2313  Name == "sse41.pminuw" ||
2314  Name == "sse41.pminud" ||
2315  Name.startswith("avx2.pminu") ||
2316  Name.startswith("avx512.mask.pminu"))) {
2318  } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
2319  Name == "avx2.pmulu.dq" ||
2320  Name == "avx512.pmulu.dq.512" ||
2321  Name.startswith("avx512.mask.pmulu.dq."))) {
2322  Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
2323  } else if (IsX86 && (Name == "sse41.pmuldq" ||
2324  Name == "avx2.pmul.dq" ||
2325  Name == "avx512.pmul.dq.512" ||
2326  Name.startswith("avx512.mask.pmul.dq."))) {
2327  Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
2328  } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
2329  Name == "sse2.cvtsi2sd" ||
2330  Name == "sse.cvtsi642ss" ||
2331  Name == "sse2.cvtsi642sd")) {
2332  Rep = Builder.CreateSIToFP(
2333  CI->getArgOperand(1),
2334  cast<VectorType>(CI->getType())->getElementType());
2335  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2336  } else if (IsX86 && Name == "avx512.cvtusi2sd") {
2337  Rep = Builder.CreateUIToFP(
2338  CI->getArgOperand(1),
2339  cast<VectorType>(CI->getType())->getElementType());
2340  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2341  } else if (IsX86 && Name == "sse2.cvtss2sd") {
2342  Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2343  Rep = Builder.CreateFPExt(
2344  Rep, cast<VectorType>(CI->getType())->getElementType());
2345  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2346  } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
2347  Name == "sse2.cvtdq2ps" ||
2348  Name == "avx.cvtdq2.pd.256" ||
2349  Name == "avx.cvtdq2.ps.256" ||
2350  Name.startswith("avx512.mask.cvtdq2pd.") ||
2351  Name.startswith("avx512.mask.cvtudq2pd.") ||
2352  Name.startswith("avx512.mask.cvtdq2ps.") ||
2353  Name.startswith("avx512.mask.cvtudq2ps.") ||
2354  Name.startswith("avx512.mask.cvtqq2pd.") ||
2355  Name.startswith("avx512.mask.cvtuqq2pd.") ||
2356  Name == "avx512.mask.cvtqq2ps.256" ||
2357  Name == "avx512.mask.cvtqq2ps.512" ||
2358  Name == "avx512.mask.cvtuqq2ps.256" ||
2359  Name == "avx512.mask.cvtuqq2ps.512" ||
2360  Name == "sse2.cvtps2pd" ||
2361  Name == "avx.cvt.ps2.pd.256" ||
2362  Name == "avx512.mask.cvtps2pd.128" ||
2363  Name == "avx512.mask.cvtps2pd.256")) {
2364  auto *DstTy = cast<FixedVectorType>(CI->getType());
2365  Rep = CI->getArgOperand(0);
2366  auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2367 
2368  unsigned NumDstElts = DstTy->getNumElements();
2369  if (NumDstElts < SrcTy->getNumElements()) {
2370  assert(NumDstElts == 2 && "Unexpected vector size");
2371  Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
2372  }
2373 
2374  bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2375  bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
2376  if (IsPS2PD)
2377  Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2378  else if (CI->arg_size() == 4 &&
2379  (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2380  cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2381  Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2382  : Intrinsic::x86_avx512_sitofp_round;
2384  { DstTy, SrcTy });
2385  Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) });
2386  } else {
2387  Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2388  : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2389  }
2390 
2391  if (CI->arg_size() >= 3)
2392  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2393  CI->getArgOperand(1));
2394  } else if (IsX86 && (Name.startswith("avx512.mask.vcvtph2ps.") ||
2395  Name.startswith("vcvtph2ps."))) {
2396  auto *DstTy = cast<FixedVectorType>(CI->getType());
2397  Rep = CI->getArgOperand(0);
2398  auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2399  unsigned NumDstElts = DstTy->getNumElements();
2400  if (NumDstElts != SrcTy->getNumElements()) {
2401  assert(NumDstElts == 4 && "Unexpected vector size");
2402  Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
2403  }
2404  Rep = Builder.CreateBitCast(
2405  Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
2406  Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
2407  if (CI->arg_size() >= 3)
2408  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2409  CI->getArgOperand(1));
2410  } else if (IsX86 && Name.startswith("avx512.mask.load")) {
2411  // "avx512.mask.loadu." or "avx512.mask.load."
2412  bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
2413  Rep =
2415  CI->getArgOperand(2), Aligned);
2416  } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
2417  auto *ResultTy = cast<FixedVectorType>(CI->getType());
2418  Type *PtrTy = ResultTy->getElementType();
2419 
2420  // Cast the pointer to element type.
2421  Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2423 
2424  Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2425  ResultTy->getNumElements());
2426 
2427  Function *ELd = Intrinsic::getDeclaration(F->getParent(),
2428  Intrinsic::masked_expandload,
2429  ResultTy);
2430  Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
2431  } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) {
2432  auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
2433  Type *PtrTy = ResultTy->getElementType();
2434 
2435  // Cast the pointer to element type.
2436  Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2438 
2439  Value *MaskVec =
2441  cast<FixedVectorType>(ResultTy)->getNumElements());
2442 
2443  Function *CSt = Intrinsic::getDeclaration(F->getParent(),
2444  Intrinsic::masked_compressstore,
2445  ResultTy);
2446  Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
2447  } else if (IsX86 && (Name.startswith("avx512.mask.compress.") ||
2448  Name.startswith("avx512.mask.expand."))) {
2449  auto *ResultTy = cast<FixedVectorType>(CI->getType());
2450 
2451  Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2452  ResultTy->getNumElements());
2453 
2454  bool IsCompress = Name[12] == 'c';
2455  Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2456  : Intrinsic::x86_avx512_mask_expand;
2457  Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
2458  Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1),
2459  MaskVec });
2460  } else if (IsX86 && Name.startswith("xop.vpcom")) {
2461  bool IsSigned;
2462  if (Name.endswith("ub") || Name.endswith("uw") || Name.endswith("ud") ||
2463  Name.endswith("uq"))
2464  IsSigned = false;
2465  else if (Name.endswith("b") || Name.endswith("w") || Name.endswith("d") ||
2466  Name.endswith("q"))
2467  IsSigned = true;
2468  else
2469  llvm_unreachable("Unknown suffix");
2470 
2471  unsigned Imm;
2472  if (CI->arg_size() == 3) {
2473  Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2474  } else {
2475  Name = Name.substr(9); // strip off "xop.vpcom"
2476  if (Name.startswith("lt"))
2477  Imm = 0;
2478  else if (Name.startswith("le"))
2479  Imm = 1;
2480  else if (Name.startswith("gt"))
2481  Imm = 2;
2482  else if (Name.startswith("ge"))
2483  Imm = 3;
2484  else if (Name.startswith("eq"))
2485  Imm = 4;
2486  else if (Name.startswith("ne"))
2487  Imm = 5;
2488  else if (Name.startswith("false"))
2489  Imm = 6;
2490  else if (Name.startswith("true"))
2491  Imm = 7;
2492  else
2493  llvm_unreachable("Unknown condition");
2494  }
2495 
2496  Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
2497  } else if (IsX86 && Name.startswith("xop.vpcmov")) {
2498  Value *Sel = CI->getArgOperand(2);
2499  Value *NotSel = Builder.CreateNot(Sel);
2500  Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2501  Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2502  Rep = Builder.CreateOr(Sel0, Sel1);
2503  } else if (IsX86 && (Name.startswith("xop.vprot") ||
2504  Name.startswith("avx512.prol") ||
2505  Name.startswith("avx512.mask.prol"))) {
2506  Rep = upgradeX86Rotate(Builder, *CI, false);
2507  } else if (IsX86 && (Name.startswith("avx512.pror") ||
2508  Name.startswith("avx512.mask.pror"))) {
2509  Rep = upgradeX86Rotate(Builder, *CI, true);
2510  } else if (IsX86 && (Name.startswith("avx512.vpshld.") ||
2511  Name.startswith("avx512.mask.vpshld") ||
2512  Name.startswith("avx512.maskz.vpshld"))) {
2513  bool ZeroMask = Name[11] == 'z';
2514  Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2515  } else if (IsX86 && (Name.startswith("avx512.vpshrd.") ||
2516  Name.startswith("avx512.mask.vpshrd") ||
2517  Name.startswith("avx512.maskz.vpshrd"))) {
2518  bool ZeroMask = Name[11] == 'z';
2519  Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
2520  } else if (IsX86 && Name == "sse42.crc32.64.8") {
2521  Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
2522  Intrinsic::x86_sse42_crc32_32_8);
2523  Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2524  Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
2525  Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2526  } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") ||
2527  Name.startswith("avx512.vbroadcast.s"))) {
2528  // Replace broadcasts with a series of insertelements.
2529  auto *VecTy = cast<FixedVectorType>(CI->getType());
2530  Type *EltTy = VecTy->getElementType();
2531  unsigned EltNum = VecTy->getNumElements();
2532  Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
2533  EltTy->getPointerTo());
2534  Value *Load = Builder.CreateLoad(EltTy, Cast);
2535  Type *I32Ty = Type::getInt32Ty(C);
2536  Rep = PoisonValue::get(VecTy);
2537  for (unsigned I = 0; I < EltNum; ++I)
2538  Rep = Builder.CreateInsertElement(Rep, Load,
2539  ConstantInt::get(I32Ty, I));
2540  } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
2541  Name.startswith("sse41.pmovzx") ||
2542  Name.startswith("avx2.pmovsx") ||
2543  Name.startswith("avx2.pmovzx") ||
2544  Name.startswith("avx512.mask.pmovsx") ||
2545  Name.startswith("avx512.mask.pmovzx"))) {
2546  auto *DstTy = cast<FixedVectorType>(CI->getType());
2547  unsigned NumDstElts = DstTy->getNumElements();
2548 
2549  // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2550  SmallVector<int, 8> ShuffleMask(NumDstElts);
2551  for (unsigned i = 0; i != NumDstElts; ++i)
2552  ShuffleMask[i] = i;
2553 
2554  Value *SV =
2555  Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
2556 
2557  bool DoSext = (StringRef::npos != Name.find("pmovsx"));
2558  Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
2559  : Builder.CreateZExt(SV, DstTy);
2560  // If there are 3 arguments, it's a masked intrinsic so we need a select.
2561  if (CI->arg_size() == 3)
2562  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2563  CI->getArgOperand(1));
2564  } else if (Name == "avx512.mask.pmov.qd.256" ||
2565  Name == "avx512.mask.pmov.qd.512" ||
2566  Name == "avx512.mask.pmov.wb.256" ||
2567  Name == "avx512.mask.pmov.wb.512") {
2568  Type *Ty = CI->getArgOperand(1)->getType();
2569  Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
2570  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2571  CI->getArgOperand(1));
2572  } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
2573  Name == "avx2.vbroadcasti128")) {
2574  // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2575  Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
2576  unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2577  auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
2578  Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
2580  Value *Load = Builder.CreateAlignedLoad(VT, Op, Align(1));
2581  if (NumSrcElts == 2)
2582  Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
2583  else
2584  Rep = Builder.CreateShuffleVector(
2585  Load, ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
2586  } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
2587  Name.startswith("avx512.mask.shuf.f"))) {
2588  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2589  Type *VT = CI->getType();
2590  unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2591  unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2592  unsigned ControlBitsMask = NumLanes - 1;
2593  unsigned NumControlBits = NumLanes / 2;
2594  SmallVector<int, 8> ShuffleMask(0);
2595 
2596  for (unsigned l = 0; l != NumLanes; ++l) {
2597  unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2598  // We actually need the other source.
2599  if (l >= NumLanes / 2)
2600  LaneMask += NumLanes;
2601  for (unsigned i = 0; i != NumElementsInLane; ++i)
2602  ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
2603  }
2604  Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2605  CI->getArgOperand(1), ShuffleMask);
2606  Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2607  CI->getArgOperand(3));
2608  }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
2609  Name.startswith("avx512.mask.broadcasti"))) {
2610  unsigned NumSrcElts =
2611  cast<FixedVectorType>(CI->getArgOperand(0)->getType())
2612  ->getNumElements();
2613  unsigned NumDstElts =
2614  cast<FixedVectorType>(CI->getType())->getNumElements();
2615 
2616  SmallVector<int, 8> ShuffleMask(NumDstElts);
2617  for (unsigned i = 0; i != NumDstElts; ++i)
2618  ShuffleMask[i] = i % NumSrcElts;
2619 
2620  Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2621  CI->getArgOperand(0),
2622  ShuffleMask);
2623  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2624  CI->getArgOperand(1));
2625  } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
2626  Name.startswith("avx2.vbroadcast") ||
2627  Name.startswith("avx512.pbroadcast") ||
2628  Name.startswith("avx512.mask.broadcast.s"))) {
2629  // Replace vp?broadcasts with a vector shuffle.
2630  Value *Op = CI->getArgOperand(0);
2631  ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
2632  Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
2635  Rep = Builder.CreateShuffleVector(Op, M);
2636 
2637  if (CI->arg_size() == 3)
2638  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2639  CI->getArgOperand(1));
2640  } else if (IsX86 && (Name.startswith("sse2.padds.") ||
2641  Name.startswith("avx2.padds.") ||
2642  Name.startswith("avx512.padds.") ||
2643  Name.startswith("avx512.mask.padds."))) {
2644  Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
2645  } else if (IsX86 && (Name.startswith("sse2.psubs.") ||
2646  Name.startswith("avx2.psubs.") ||
2647  Name.startswith("avx512.psubs.") ||
2648  Name.startswith("avx512.mask.psubs."))) {
2649  Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
2650  } else if (IsX86 && (Name.startswith("sse2.paddus.") ||
2651  Name.startswith("avx2.paddus.") ||
2652  Name.startswith("avx512.mask.paddus."))) {
2653  Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
2654  } else if (IsX86 && (Name.startswith("sse2.psubus.") ||
2655  Name.startswith("avx2.psubus.") ||
2656  Name.startswith("avx512.mask.psubus."))) {
2657  Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
2658  } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
2660  CI->getArgOperand(1),
2661  CI->getArgOperand(2),
2662  CI->getArgOperand(3),
2663  CI->getArgOperand(4),
2664  false);
2665  } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
2667  CI->getArgOperand(1),
2668  CI->getArgOperand(2),
2669  CI->getArgOperand(3),
2670  CI->getArgOperand(4),
2671  true);
2672  } else if (IsX86 && (Name == "sse2.psll.dq" ||
2673  Name == "avx2.psll.dq")) {
2674  // 128/256-bit shift left specified in bits.
2675  unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2677  Shift / 8); // Shift is in bits.
2678  } else if (IsX86 && (Name == "sse2.psrl.dq" ||
2679  Name == "avx2.psrl.dq")) {
2680  // 128/256-bit shift right specified in bits.
2681  unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2683  Shift / 8); // Shift is in bits.
2684  } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
2685  Name == "avx2.psll.dq.bs" ||
2686  Name == "avx512.psll.dq.512")) {
2687  // 128/256/512-bit shift left specified in bytes.
2688  unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2690  } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
2691  Name == "avx2.psrl.dq.bs" ||
2692  Name == "avx512.psrl.dq.512")) {
2693  // 128/256/512-bit shift right specified in bytes.
2694  unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2696  } else if (IsX86 && (Name == "sse41.pblendw" ||
2697  Name.startswith("sse41.blendp") ||
2698  Name.startswith("avx.blend.p") ||
2699  Name == "avx2.pblendw" ||
2700  Name.startswith("avx2.pblendd."))) {
2701  Value *Op0 = CI->getArgOperand(0);
2702  Value *Op1 = CI->getArgOperand(1);
2703  unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2704  auto *VecTy = cast<FixedVectorType>(CI->getType());
2705  unsigned NumElts = VecTy->getNumElements();
2706 
2707  SmallVector<int, 16> Idxs(NumElts);
2708  for (unsigned i = 0; i != NumElts; ++i)
2709  Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
2710 
2711  Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2712  } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
2713  Name == "avx2.vinserti128" ||
2714  Name.startswith("avx512.mask.insert"))) {
2715  Value *Op0 = CI->getArgOperand(0);
2716  Value *Op1 = CI->getArgOperand(1);
2717  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2718  unsigned DstNumElts =
2719  cast<FixedVectorType>(CI->getType())->getNumElements();
2720  unsigned SrcNumElts =
2721  cast<FixedVectorType>(Op1->getType())->getNumElements();
2722  unsigned Scale = DstNumElts / SrcNumElts;
2723 
2724  // Mask off the high bits of the immediate value; hardware ignores those.
2725  Imm = Imm % Scale;
2726 
2727  // Extend the second operand into a vector the size of the destination.
2728  SmallVector<int, 8> Idxs(DstNumElts);
2729  for (unsigned i = 0; i != SrcNumElts; ++i)
2730  Idxs[i] = i;
2731  for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
2732  Idxs[i] = SrcNumElts;
2733  Rep = Builder.CreateShuffleVector(Op1, Idxs);
2734 
2735  // Insert the second operand into the first operand.
2736 
2737  // Note that there is no guarantee that instruction lowering will actually
2738  // produce a vinsertf128 instruction for the created shuffles. In
2739  // particular, the 0 immediate case involves no lane changes, so it can
2740  // be handled as a blend.
2741 
2742  // Example of shuffle mask for 32-bit elements:
2743  // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
2744  // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
2745 
2746  // First fill with identify mask.
2747  for (unsigned i = 0; i != DstNumElts; ++i)
2748  Idxs[i] = i;
2749  // Then replace the elements where we need to insert.
2750  for (unsigned i = 0; i != SrcNumElts; ++i)
2751  Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
2752  Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
2753 
2754  // If the intrinsic has a mask operand, handle that.
2755  if (CI->arg_size() == 5)
2756  Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2757  CI->getArgOperand(3));
2758  } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
2759  Name == "avx2.vextracti128" ||
2760  Name.startswith("avx512.mask.vextract"))) {
2761  Value *Op0 = CI->getArgOperand(0);
2762  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2763  unsigned DstNumElts =
2764  cast<FixedVectorType>(CI->getType())->getNumElements();
2765  unsigned SrcNumElts =
2766  cast<FixedVectorType>(Op0->getType())->getNumElements();
2767  unsigned Scale = SrcNumElts / DstNumElts;
2768 
2769  // Mask off the high bits of the immediate value; hardware ignores those.
2770  Imm = Imm % Scale;
2771 
2772  // Get indexes for the subvector of the input vector.
2773  SmallVector<int, 8> Idxs(DstNumElts);
2774  for (unsigned i = 0; i != DstNumElts; ++i) {
2775  Idxs[i] = i + (Imm * DstNumElts);
2776  }
2777  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2778 
2779  // If the intrinsic has a mask operand, handle that.
2780  if (CI->arg_size() == 4)
2781  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2782  CI->getArgOperand(2));
2783  } else if (!IsX86 && Name == "stackprotectorcheck") {
2784  Rep = nullptr;
2785  } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
2786  Name.startswith("avx512.mask.perm.di."))) {
2787  Value *Op0 = CI->getArgOperand(0);
2788  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2789  auto *VecTy = cast<FixedVectorType>(CI->getType());
2790  unsigned NumElts = VecTy->getNumElements();
2791 
2792  SmallVector<int, 8> Idxs(NumElts);
2793  for (unsigned i = 0; i != NumElts; ++i)
2794  Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
2795 
2796  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2797 
2798  if (CI->arg_size() == 4)
2799  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2800  CI->getArgOperand(2));
2801  } else if (IsX86 && (Name.startswith("avx.vperm2f128.") ||
2802  Name == "avx2.vperm2i128")) {
2803  // The immediate permute control byte looks like this:
2804  // [1:0] - select 128 bits from sources for low half of destination
2805  // [2] - ignore
2806  // [3] - zero low half of destination
2807  // [5:4] - select 128 bits from sources for high half of destination
2808  // [6] - ignore
2809  // [7] - zero high half of destination
2810 
2811  uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2812 
2813  unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2814  unsigned HalfSize = NumElts / 2;
2815  SmallVector<int, 8> ShuffleMask(NumElts);
2816 
2817  // Determine which operand(s) are actually in use for this instruction.
2818  Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2819  Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2820 
2821  // If needed, replace operands based on zero mask.
2822  V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
2823  V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
2824 
2825  // Permute low half of result.
2826  unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
2827  for (unsigned i = 0; i < HalfSize; ++i)
2828  ShuffleMask[i] = StartIndex + i;
2829 
2830  // Permute high half of result.
2831  StartIndex = (Imm & 0x10) ? HalfSize : 0;
2832  for (unsigned i = 0; i < HalfSize; ++i)
2833  ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
2834 
2835  Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2836 
2837  } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
2838  Name == "sse2.pshuf.d" ||
2839  Name.startswith("avx512.mask.vpermil.p") ||
2840  Name.startswith("avx512.mask.pshuf.d."))) {
2841  Value *Op0 = CI->getArgOperand(0);
2842  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2843  auto *VecTy = cast<FixedVectorType>(CI->getType());
2844  unsigned NumElts = VecTy->getNumElements();
2845  // Calculate the size of each index in the immediate.
2846  unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
2847  unsigned IdxMask = ((1 << IdxSize) - 1);
2848 
2849  SmallVector<int, 8> Idxs(NumElts);
2850  // Lookup the bits for this element, wrapping around the immediate every
2851  // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
2852  // to offset by the first index of each group.
2853  for (unsigned i = 0; i != NumElts; ++i)
2854  Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
2855 
2856  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2857 
2858  if (CI->arg_size() == 4)
2859  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2860  CI->getArgOperand(2));
2861  } else if (IsX86 && (Name == "sse2.pshufl.w" ||
2862  Name.startswith("avx512.mask.pshufl.w."))) {
2863  Value *Op0 = CI->getArgOperand(0);
2864  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2865  unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2866 
2867  SmallVector<int, 16> Idxs(NumElts);
2868  for (unsigned l = 0; l != NumElts; l += 8) {
2869  for (unsigned i = 0; i != 4; ++i)
2870  Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
2871  for (unsigned i = 4; i != 8; ++i)
2872  Idxs[i + l] = i + l;
2873  }
2874 
2875  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2876 
2877  if (CI->arg_size() == 4)
2878  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2879  CI->getArgOperand(2));
2880  } else if (IsX86 && (Name == "sse2.pshufh.w" ||
2881  Name.startswith("avx512.mask.pshufh.w."))) {
2882  Value *Op0 = CI->getArgOperand(0);
2883  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2884  unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2885 
2886  SmallVector<int, 16> Idxs(NumElts);
2887  for (unsigned l = 0; l != NumElts; l += 8) {
2888  for (unsigned i = 0; i != 4; ++i)
2889  Idxs[i + l] = i + l;
2890  for (unsigned i = 0; i != 4; ++i)
2891  Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
2892  }
2893 
2894  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2895 
2896  if (CI->arg_size() == 4)
2897  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2898  CI->getArgOperand(2));
2899  } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
2900  Value *Op0 = CI->getArgOperand(0);
2901  Value *Op1 = CI->getArgOperand(1);
2902  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2903  unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2904 
2905  unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2906  unsigned HalfLaneElts = NumLaneElts / 2;
2907 
2908  SmallVector<int, 16> Idxs(NumElts);
2909  for (unsigned i = 0; i != NumElts; ++i) {
2910  // Base index is the starting element of the lane.
2911  Idxs[i] = i - (i % NumLaneElts);
2912  // If we are half way through the lane switch to the other source.
2913  if ((i % NumLaneElts) >= HalfLaneElts)
2914  Idxs[i] += NumElts;
2915  // Now select the specific element. By adding HalfLaneElts bits from
2916  // the immediate. Wrapping around the immediate every 8-bits.
2917  Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
2918  }
2919 
2920  Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2921 
2922  Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2923  CI->getArgOperand(3));
2924  } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
2925  Name.startswith("avx512.mask.movshdup") ||
2926  Name.startswith("avx512.mask.movsldup"))) {
2927  Value *Op0 = CI->getArgOperand(0);
2928  unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2929  unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2930 
2931  unsigned Offset = 0;
2932  if (Name.startswith("avx512.mask.movshdup."))
2933  Offset = 1;
2934 
2935  SmallVector<int, 16> Idxs(NumElts);
2936  for (unsigned l = 0; l != NumElts; l += NumLaneElts)
2937  for (unsigned i = 0; i != NumLaneElts; i += 2) {
2938  Idxs[i + l + 0] = i + l + Offset;
2939  Idxs[i + l + 1] = i + l + Offset;
2940  }
2941 
2942  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2943 
2944  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2945  CI->getArgOperand(1));
2946  } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
2947  Name.startswith("avx512.mask.unpckl."))) {
2948  Value *Op0 = CI->getArgOperand(0);
2949  Value *Op1 = CI->getArgOperand(1);
2950  int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2951  int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2952 
2953  SmallVector<int, 64> Idxs(NumElts);
2954  for (int l = 0; l != NumElts; l += NumLaneElts)
2955  for (int i = 0; i != NumLaneElts; ++i)
2956  Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
2957 
2958  Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2959 
2960  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2961  CI->getArgOperand(2));
2962  } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
2963  Name.startswith("avx512.mask.unpckh."))) {
2964  Value *Op0 = CI->getArgOperand(0);
2965  Value *Op1 = CI->getArgOperand(1);
2966  int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2967  int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2968 
2969  SmallVector<int, 64> Idxs(NumElts);
2970  for (int l = 0; l != NumElts; l += NumLaneElts)
2971  for (int i = 0; i != NumLaneElts; ++i)
2972  Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
2973 
2974  Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2975 
2976  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2977  CI->getArgOperand(2));
2978  } else if (IsX86 && (Name.startswith("avx512.mask.and.") ||
2979  Name.startswith("avx512.mask.pand."))) {
2980  VectorType *FTy = cast<VectorType>(CI->getType());
2981  VectorType *ITy = VectorType::getInteger(FTy);
2982  Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2983  Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2984  Rep = Builder.CreateBitCast(Rep, FTy);
2985  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2986  CI->getArgOperand(2));
2987  } else if (IsX86 && (Name.startswith("avx512.mask.andn.") ||
2988  Name.startswith("avx512.mask.pandn."))) {
2989  VectorType *FTy = cast<VectorType>(CI->getType());
2990  VectorType *ITy = VectorType::getInteger(FTy);
2991  Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
2992  Rep = Builder.CreateAnd(Rep,
2993  Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2994  Rep = Builder.CreateBitCast(Rep, FTy);
2995  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2996  CI->getArgOperand(2));
2997  } else if (IsX86 && (Name.startswith("avx512.mask.or.") ||
2998  Name.startswith("avx512.mask.por."))) {
2999  VectorType *FTy = cast<VectorType>(CI->getType());
3000  VectorType *ITy = VectorType::getInteger(FTy);
3001  Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3002  Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3003  Rep = Builder.CreateBitCast(Rep, FTy);
3004  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3005  CI->getArgOperand(2));
3006  } else if (IsX86 && (Name.startswith("avx512.mask.xor.") ||
3007  Name.startswith("avx512.mask.pxor."))) {
3008  VectorType *FTy = cast<VectorType>(CI->getType());
3009  VectorType *ITy = VectorType::getInteger(FTy);
3010  Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3011  Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3012  Rep = Builder.CreateBitCast(Rep, FTy);
3013  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3014  CI->getArgOperand(2));
3015  } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
3016  Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3017  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3018  CI->getArgOperand(2));
3019  } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
3020  Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3021  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3022  CI->getArgOperand(2));
3023  } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
3024  Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3025  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3026  CI->getArgOperand(2));
3027  } else if (IsX86 && Name.startswith("avx512.mask.add.p")) {
3028  if (Name.endswith(".512")) {
3029  Intrinsic::ID IID;
3030  if (Name[17] == 's')
3031  IID = Intrinsic::x86_avx512_add_ps_512;
3032  else
3033  IID = Intrinsic::x86_avx512_add_pd_512;
3034 
3035  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3036  { CI->getArgOperand(0), CI->getArgOperand(1),
3037  CI->getArgOperand(4) });
3038  } else {
3039  Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3040  }
3041  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3042  CI->getArgOperand(2));
3043  } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
3044  if (Name.endswith(".512")) {
3045  Intrinsic::ID IID;
3046  if (Name[17] == 's')
3047  IID = Intrinsic::x86_avx512_div_ps_512;
3048  else
3049  IID = Intrinsic::x86_avx512_div_pd_512;
3050 
3051  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3052  { CI->getArgOperand(0), CI->getArgOperand(1),
3053  CI->getArgOperand(4) });
3054  } else {
3055  Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3056  }
3057  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3058  CI->getArgOperand(2));
3059  } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
3060  if (Name.endswith(".512")) {
3061  Intrinsic::ID IID;
3062  if (Name[17] == 's')
3063  IID = Intrinsic::x86_avx512_mul_ps_512;
3064  else
3065  IID = Intrinsic::x86_avx512_mul_pd_512;
3066 
3067  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3068  { CI->getArgOperand(0), CI->getArgOperand(1),
3069  CI->getArgOperand(4) });
3070  } else {
3071  Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3072  }
3073  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3074  CI->getArgOperand(2));
3075  } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
3076  if (Name.endswith(".512")) {
3077  Intrinsic::ID IID;
3078  if (Name[17] == 's')
3079  IID = Intrinsic::x86_avx512_sub_ps_512;
3080  else
3081  IID = Intrinsic::x86_avx512_sub_pd_512;
3082 
3083  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3084  { CI->getArgOperand(0), CI->getArgOperand(1),
3085  CI->getArgOperand(4) });
3086  } else {
3087  Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3088  }
3089  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3090  CI->getArgOperand(2));
3091  } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
3092  Name.startswith("avx512.mask.min.p")) &&
3093  Name.drop_front(18) == ".512") {
3094  bool IsDouble = Name[17] == 'd';
3095  bool IsMin = Name[13] == 'i';
3096  static const Intrinsic::ID MinMaxTbl[2][2] = {
3097  { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
3098  { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
3099  };
3100  Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3101 
3102  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3103  { CI->getArgOperand(0), CI->getArgOperand(1),
3104  CI->getArgOperand(4) });
3105  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3106  CI->getArgOperand(2));
3107  } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
3108  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
3109  Intrinsic::ctlz,
3110  CI->getType()),
3111  { CI->getArgOperand(0), Builder.getInt1(false) });
3112  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
3113  CI->getArgOperand(1));
3114  } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
3115  bool IsImmediate = Name[16] == 'i' ||
3116  (Name.size() > 18 && Name[18] == 'i');
3117  bool IsVariable = Name[16] == 'v';
3118  char Size = Name[16] == '.' ? Name[17] :
3119  Name[17] == '.' ? Name[18] :
3120  Name[18] == '.' ? Name[19] :
3121  Name[20];
3122 
3123  Intrinsic::ID IID;
3124  if (IsVariable && Name[17] != '.') {
3125  if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3126  IID = Intrinsic::x86_avx2_psllv_q;
3127  else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3128  IID = Intrinsic::x86_avx2_psllv_q_256;
3129  else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3130  IID = Intrinsic::x86_avx2_psllv_d;
3131  else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3132  IID = Intrinsic::x86_avx2_psllv_d_256;
3133  else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3134  IID = Intrinsic::x86_avx512_psllv_w_128;
3135  else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3136  IID = Intrinsic::x86_avx512_psllv_w_256;
3137  else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3138  IID = Intrinsic::x86_avx512_psllv_w_512;
3139  else
3140  llvm_unreachable("Unexpected size");
3141  } else if (Name.endswith(".128")) {
3142  if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3143  IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3144  : Intrinsic::x86_sse2_psll_d;
3145  else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3146  IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3147  : Intrinsic::x86_sse2_psll_q;
3148  else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3149  IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3150  : Intrinsic::x86_sse2_psll_w;
3151  else
3152  llvm_unreachable("Unexpected size");
3153  } else if (Name.endswith(".256")) {
3154  if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3155  IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3156  : Intrinsic::x86_avx2_psll_d;
3157  else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3158  IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3159  : Intrinsic::x86_avx2_psll_q;
3160  else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3161  IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3162  : Intrinsic::x86_avx2_psll_w;
3163  else
3164  llvm_unreachable("Unexpected size");
3165  } else {
3166  if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3167  IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
3168  IsVariable ? Intrinsic::x86_avx512_psllv_d_512 :
3169  Intrinsic::x86_avx512_psll_d_512;
3170  else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3171  IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
3172  IsVariable ? Intrinsic::x86_avx512_psllv_q_512 :
3173  Intrinsic::x86_avx512_psll_q_512;
3174  else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3175  IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3176  : Intrinsic::x86_avx512_psll_w_512;
3177  else
3178  llvm_unreachable("Unexpected size");
3179  }
3180 
3181  Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3182  } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
3183  bool IsImmediate = Name[16] == 'i' ||
3184  (Name.size() > 18 && Name[18] == 'i');
3185  bool IsVariable = Name[16] == 'v';
3186  char Size = Name[16] == '.' ? Name[17] :
3187  Name[17] == '.' ? Name[18] :
3188  Name[18] == '.' ? Name[19] :
3189  Name[20];
3190 
3191  Intrinsic::ID IID;
3192  if (IsVariable && Name[17] != '.') {
3193  if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3194  IID = Intrinsic::x86_avx2_psrlv_q;
3195  else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3196  IID = Intrinsic::x86_avx2_psrlv_q_256;
3197  else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3198  IID = Intrinsic::x86_avx2_psrlv_d;
3199  else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3200  IID = Intrinsic::x86_avx2_psrlv_d_256;
3201  else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3202  IID = Intrinsic::x86_avx512_psrlv_w_128;
3203  else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3204  IID = Intrinsic::x86_avx512_psrlv_w_256;
3205  else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3206  IID = Intrinsic::x86_avx512_psrlv_w_512;
3207  else
3208  llvm_unreachable("Unexpected size");
3209  } else if (Name.endswith(".128")) {
3210  if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3211  IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3212  : Intrinsic::x86_sse2_psrl_d;
3213  else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3214  IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3215  : Intrinsic::x86_sse2_psrl_q;
3216  else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3217  IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3218  : Intrinsic::x86_sse2_psrl_w;
3219  else
3220  llvm_unreachable("Unexpected size");
3221  } else if (Name.endswith(".256")) {
3222  if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3223  IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3224  : Intrinsic::x86_avx2_psrl_d;
3225  else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3226  IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3227  : Intrinsic::x86_avx2_psrl_q;
3228  else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3229  IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3230  : Intrinsic::x86_avx2_psrl_w;
3231  else
3232  llvm_unreachable("Unexpected size");
3233  } else {
3234  if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3235  IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
3236  IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 :
3237  Intrinsic::x86_avx512_psrl_d_512;
3238  else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3239  IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
3240  IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 :
3241  Intrinsic::x86_avx512_psrl_q_512;
3242  else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3243  IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3244  : Intrinsic::x86_avx512_psrl_w_512;
3245  else
3246  llvm_unreachable("Unexpected size");
3247  }
3248 
3249  Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3250  } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
3251  bool IsImmediate = Name[16] == 'i' ||
3252  (Name.size() > 18 && Name[18] == 'i');
3253  bool IsVariable = Name[16] == 'v';
3254  char Size = Name[16] == '.' ? Name[17] :
3255  Name[17] == '.' ? Name[18] :
3256  Name[18] == '.' ? Name[19] :
3257  Name[20];
3258 
3259  Intrinsic::ID IID;
3260  if (IsVariable && Name[17] != '.') {
3261  if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3262  IID = Intrinsic::x86_avx2_psrav_d;
3263  else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3264  IID = Intrinsic::x86_avx2_psrav_d_256;
3265  else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3266  IID = Intrinsic::x86_avx512_psrav_w_128;
3267  else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3268  IID = Intrinsic::x86_avx512_psrav_w_256;
3269  else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3270  IID = Intrinsic::x86_avx512_psrav_w_512;
3271  else
3272  llvm_unreachable("Unexpected size");
3273  } else if (Name.endswith(".128")) {
3274  if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3275  IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3276  : Intrinsic::x86_sse2_psra_d;
3277  else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3278  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
3279  IsVariable ? Intrinsic::x86_avx512_psrav_q_128 :
3280  Intrinsic::x86_avx512_psra_q_128;
3281  else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3282  IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3283  : Intrinsic::x86_sse2_psra_w;
3284  else
3285  llvm_unreachable("Unexpected size");
3286  } else if (Name.endswith(".256")) {
3287  if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3288  IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3289  : Intrinsic::x86_avx2_psra_d;
3290  else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3291  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
3292  IsVariable ? Intrinsic::x86_avx512_psrav_q_256 :
3293  Intrinsic::x86_avx512_psra_q_256;
3294  else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3295  IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3296  : Intrinsic::x86_avx2_psra_w;
3297  else
3298  llvm_unreachable("Unexpected size");
3299  } else {
3300  if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3301  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
3302  IsVariable ? Intrinsic::x86_avx512_psrav_d_512 :
3303  Intrinsic::x86_avx512_psra_d_512;
3304  else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3305  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
3306  IsVariable ? Intrinsic::x86_avx512_psrav_q_512 :
3307  Intrinsic::x86_avx512_psra_q_512;
3308  else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3309  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3310  : Intrinsic::x86_avx512_psra_w_512;
3311  else
3312  llvm_unreachable("Unexpected size");
3313  }
3314 
3315  Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3316  } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
3317  Rep = upgradeMaskedMove(Builder, *CI);
3318  } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
3319  Rep = UpgradeMaskToInt(Builder, *CI);
3320  } else if (IsX86 && Name.endswith(".movntdqa")) {
3321  Module *M = F->getParent();
3322  MDNode *Node = MDNode::get(
3324 
3325  Value *Ptr = CI->getArgOperand(0);
3326 
3327  // Convert the type of the pointer to a pointer to the stored type.
3328  Value *BC = Builder.CreateBitCast(
3329  Ptr, PointerType::getUnqual(CI->getType()), "cast");
3330  LoadInst *LI = Builder.CreateAlignedLoad(
3331  CI->getType(), BC,
3333  LI->setMetadata(M->getMDKindID("nontemporal"), Node);
3334  Rep = LI;
3335  } else if (IsX86 && (Name.startswith("fma.vfmadd.") ||
3336  Name.startswith("fma.vfmsub.") ||
3337  Name.startswith("fma.vfnmadd.") ||
3338  Name.startswith("fma.vfnmsub."))) {
3339  bool NegMul = Name[6] == 'n';
3340  bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3341  bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3342 
3343  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3344  CI->getArgOperand(2) };
3345 
3346  if (IsScalar) {
3347  Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3348  Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3349  Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3350  }
3351 
3352  if (NegMul && !IsScalar)
3353  Ops[0] = Builder.CreateFNeg(Ops[0]);
3354  if (NegMul && IsScalar)
3355  Ops[1] = Builder.CreateFNeg(Ops[1]);
3356  if (NegAcc)
3357  Ops[2] = Builder.CreateFNeg(Ops[2]);
3358 
3359  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3360  Intrinsic::fma,
3361  Ops[0]->getType()),
3362  Ops);
3363 
3364  if (IsScalar)
3365  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
3366  (uint64_t)0);
3367  } else if (IsX86 && Name.startswith("fma4.vfmadd.s")) {
3368  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3369  CI->getArgOperand(2) };
3370 
3371  Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3372  Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3373  Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3374 
3375  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3376  Intrinsic::fma,
3377  Ops[0]->getType()),
3378  Ops);
3379 
3380  Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
3381  Rep, (uint64_t)0);
3382  } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") ||
3383  Name.startswith("avx512.maskz.vfmadd.s") ||
3384  Name.startswith("avx512.mask3.vfmadd.s") ||
3385  Name.startswith("avx512.mask3.vfmsub.s") ||
3386  Name.startswith("avx512.mask3.vfnmsub.s"))) {
3387  bool IsMask3 = Name[11] == '3';
3388  bool IsMaskZ = Name[11] == 'z';
3389  // Drop the "avx512.mask." to make it easier.
3390  Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3391  bool NegMul = Name[2] == 'n';
3392  bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3393 
3394  Value *A = CI->getArgOperand(0);
3395  Value *B = CI->getArgOperand(1);
3396  Value *C = CI->getArgOperand(2);
3397 
3398  if (NegMul && (IsMask3 || IsMaskZ))
3399  A = Builder.CreateFNeg(A);
3400  if (NegMul && !(IsMask3 || IsMaskZ))
3401  B = Builder.CreateFNeg(B);
3402  if (NegAcc)
3403  C = Builder.CreateFNeg(C);
3404 
3405  A = Builder.CreateExtractElement(A, (uint64_t)0);
3406  B = Builder.CreateExtractElement(B, (uint64_t)0);
3407  C = Builder.CreateExtractElement(C, (uint64_t)0);
3408 
3409  if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3410  cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3411  Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
3412 
3413  Intrinsic::ID IID;
3414  if (Name.back() == 'd')
3415  IID = Intrinsic::x86_avx512_vfmadd_f64;
3416  else
3417  IID = Intrinsic::x86_avx512_vfmadd_f32;
3419  Rep = Builder.CreateCall(FMA, Ops);
3420  } else {
3422  Intrinsic::fma,
3423  A->getType());
3424  Rep = Builder.CreateCall(FMA, { A, B, C });
3425  }
3426 
3427  Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
3428  IsMask3 ? C : A;
3429 
3430  // For Mask3 with NegAcc, we need to create a new extractelement that
3431  // avoids the negation above.
3432  if (NegAcc && IsMask3)
3433  PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
3434  (uint64_t)0);
3435 
3437  Rep, PassThru);
3438  Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
3439  Rep, (uint64_t)0);
3440  } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") ||
3441  Name.startswith("avx512.mask.vfnmadd.p") ||
3442  Name.startswith("avx512.mask.vfnmsub.p") ||
3443  Name.startswith("avx512.mask3.vfmadd.p") ||
3444  Name.startswith("avx512.mask3.vfmsub.p") ||
3445  Name.startswith("avx512.mask3.vfnmsub.p") ||
3446  Name.startswith("avx512.maskz.vfmadd.p"))) {
3447  bool IsMask3 = Name[11] == '3';
3448  bool IsMaskZ = Name[11] == 'z';
3449  // Drop the "avx512.mask." to make it easier.
3450  Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3451  bool NegMul = Name[2] == 'n';
3452  bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3453 
3454  Value *A = CI->getArgOperand(0);
3455  Value *B = CI->getArgOperand(1);
3456  Value *C = CI->getArgOperand(2);
3457 
3458  if (NegMul && (IsMask3 || IsMaskZ))
3459  A = Builder.CreateFNeg(A);
3460  if (NegMul && !(IsMask3 || IsMaskZ))
3461  B = Builder.CreateFNeg(B);
3462  if (NegAcc)
3463  C = Builder.CreateFNeg(C);
3464 
3465  if (CI->arg_size() == 5 &&
3466  (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3467  cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3468  Intrinsic::ID IID;
3469  // Check the character before ".512" in string.
3470  if (Name[Name.size()-5] == 's')
3471  IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3472  else
3473  IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3474 
3475  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3476  { A, B, C, CI->getArgOperand(4) });
3477  } else {
3479  Intrinsic::fma,
3480  A->getType());
3481  Rep = Builder.CreateCall(FMA, { A, B, C });
3482  }
3483 
3484  Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3485  IsMask3 ? CI->getArgOperand(2) :
3486  CI->getArgOperand(0);
3487 
3488  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3489  } else if (IsX86 && Name.startswith("fma.vfmsubadd.p")) {
3490  unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3491  unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3492  Intrinsic::ID IID;
3493  if (VecWidth == 128 && EltWidth == 32)
3494  IID = Intrinsic::x86_fma_vfmaddsub_ps;
3495  else if (VecWidth == 256 && EltWidth == 32)
3496  IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
3497  else if (VecWidth == 128 && EltWidth == 64)
3498  IID = Intrinsic::x86_fma_vfmaddsub_pd;
3499  else if (VecWidth == 256 && EltWidth == 64)
3500  IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
3501  else
3502  llvm_unreachable("Unexpected intrinsic");
3503 
3504  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3505  CI->getArgOperand(2) };
3506  Ops[2] = Builder.CreateFNeg(Ops[2]);
3507  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3508  Ops);
3509  } else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") ||
3510  Name.startswith("avx512.mask3.vfmaddsub.p") ||
3511  Name.startswith("avx512.maskz.vfmaddsub.p") ||
3512  Name.startswith("avx512.mask3.vfmsubadd.p"))) {
3513  bool IsMask3 = Name[11] == '3';
3514  bool IsMaskZ = Name[11] == 'z';
3515  // Drop the "avx512.mask." to make it easier.
3516  Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3517  bool IsSubAdd = Name[3] == 's';
3518  if (CI->arg_size() == 5) {
3519  Intrinsic::ID IID;
3520  // Check the character before ".512" in string.
3521  if (Name[Name.size()-5] == 's')
3522  IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3523  else
3524  IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3525 
3526  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3527  CI->getArgOperand(2), CI->getArgOperand(4) };
3528  if (IsSubAdd)
3529  Ops[2] = Builder.CreateFNeg(Ops[2]);
3530 
3531  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3532  Ops);
3533  } else {
3534  int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3535 
3536  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3537  CI->getArgOperand(2) };
3538 
3539  Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3540  Ops[0]->getType());
3541  Value *Odd = Builder.CreateCall(FMA, Ops);
3542  Ops[2] = Builder.CreateFNeg(Ops[2]);
3543  Value *Even = Builder.CreateCall(FMA, Ops);
3544 
3545  if (IsSubAdd)
3546  std::swap(Even, Odd);
3547 
3548  SmallVector<int, 32> Idxs(NumElts);
3549  for (int i = 0; i != NumElts; ++i)
3550  Idxs[i] = i + (i % 2) * NumElts;
3551 
3552  Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3553  }
3554 
3555  Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3556  IsMask3 ? CI->getArgOperand(2) :
3557  CI->getArgOperand(0);
3558 
3559  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3560  } else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
3561  Name.startswith("avx512.maskz.pternlog."))) {
3562  bool ZeroMask = Name[11] == 'z';
3563  unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3564  unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3565  Intrinsic::ID IID;
3566  if (VecWidth == 128 && EltWidth == 32)
3567  IID = Intrinsic::x86_avx512_pternlog_d_128;
3568  else if (VecWidth == 256 && EltWidth == 32)
3569  IID = Intrinsic::x86_avx512_pternlog_d_256;
3570  else if (VecWidth == 512 && EltWidth == 32)
3571  IID = Intrinsic::x86_avx512_pternlog_d_512;
3572  else if (VecWidth == 128 && EltWidth == 64)
3573  IID = Intrinsic::x86_avx512_pternlog_q_128;
3574  else if (VecWidth == 256 && EltWidth == 64)
3575  IID = Intrinsic::x86_avx512_pternlog_q_256;
3576  else if (VecWidth == 512 && EltWidth == 64)
3577  IID = Intrinsic::x86_avx512_pternlog_q_512;
3578  else
3579  llvm_unreachable("Unexpected intrinsic");
3580 
3581  Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3582  CI->getArgOperand(2), CI->getArgOperand(3) };
3583  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3584  Args);
3585  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3586  : CI->getArgOperand(0);
3587  Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3588  } else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") ||
3589  Name.startswith("avx512.maskz.vpmadd52"))) {
3590  bool ZeroMask = Name[11] == 'z';
3591  bool High = Name[20] == 'h' || Name[21] == 'h';
3592  unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3593  Intrinsic::ID IID;
3594  if (VecWidth == 128 && !High)
3595  IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3596  else if (VecWidth == 256 && !High)
3597  IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3598  else if (VecWidth == 512 && !High)
3599  IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3600  else if (VecWidth == 128 && High)
3601  IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3602  else if (VecWidth == 256 && High)
3603  IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3604  else if (VecWidth == 512 && High)
3605  IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3606  else
3607  llvm_unreachable("Unexpected intrinsic");
3608 
3609  Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3610  CI->getArgOperand(2) };
3611  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3612  Args);
3613  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3614  : CI->getArgOperand(0);
3615  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3616  } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") ||
3617  Name.startswith("avx512.mask.vpermt2var.") ||
3618  Name.startswith("avx512.maskz.vpermt2var."))) {
3619  bool ZeroMask = Name[11] == 'z';
3620  bool IndexForm = Name[17] == 'i';
3621  Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
3622  } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
3623  Name.startswith("avx512.maskz.vpdpbusd.") ||
3624  Name.startswith("avx512.mask.vpdpbusds.") ||
3625  Name.startswith("avx512.maskz.vpdpbusds."))) {
3626  bool ZeroMask = Name[11] == 'z';
3627  bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3628  unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3629  Intrinsic::ID IID;
3630  if (VecWidth == 128 && !IsSaturating)
3631  IID = Intrinsic::x86_avx512_vpdpbusd_128;
3632  else if (VecWidth == 256 && !IsSaturating)
3633  IID = Intrinsic::x86_avx512_vpdpbusd_256;
3634  else if (VecWidth == 512 && !IsSaturating)
3635  IID = Intrinsic::x86_avx512_vpdpbusd_512;
3636  else if (VecWidth == 128 && IsSaturating)
3637  IID = Intrinsic::x86_avx512_vpdpbusds_128;
3638  else if (VecWidth == 256 && IsSaturating)
3639  IID = Intrinsic::x86_avx512_vpdpbusds_256;
3640  else if (VecWidth == 512 && IsSaturating)
3641  IID = Intrinsic::x86_avx512_vpdpbusds_512;
3642  else
3643  llvm_unreachable("Unexpected intrinsic");
3644 
3645  Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3646  CI->getArgOperand(2) };
3647  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3648  Args);
3649  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3650  : CI->getArgOperand(0);
3651  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3652  } else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") ||
3653  Name.startswith("avx512.maskz.vpdpwssd.") ||
3654  Name.startswith("avx512.mask.vpdpwssds.") ||
3655  Name.startswith("avx512.maskz.vpdpwssds."))) {
3656  bool ZeroMask = Name[11] == 'z';
3657  bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3658  unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3659  Intrinsic::ID IID;
3660  if (VecWidth == 128 && !IsSaturating)
3661  IID = Intrinsic::x86_avx512_vpdpwssd_128;
3662  else if (VecWidth == 256 && !IsSaturating)
3663  IID = Intrinsic::x86_avx512_vpdpwssd_256;
3664  else if (VecWidth == 512 && !IsSaturating)
3665  IID = Intrinsic::x86_avx512_vpdpwssd_512;
3666  else if (VecWidth == 128 && IsSaturating)
3667  IID = Intrinsic::x86_avx512_vpdpwssds_128;
3668  else if (VecWidth == 256 && IsSaturating)
3669  IID = Intrinsic::x86_avx512_vpdpwssds_256;
3670  else if (VecWidth == 512 && IsSaturating)
3671  IID = Intrinsic::x86_avx512_vpdpwssds_512;
3672  else
3673  llvm_unreachable("Unexpected intrinsic");
3674 
3675  Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3676  CI->getArgOperand(2) };
3677  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3678  Args);
3679  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3680  : CI->getArgOperand(0);
3681  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3682  } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
3683  Name == "addcarry.u32" || Name == "addcarry.u64" ||
3684  Name == "subborrow.u32" || Name == "subborrow.u64")) {
3685  Intrinsic::ID IID;
3686  if (Name[0] == 'a' && Name.back() == '2')
3687  IID = Intrinsic::x86_addcarry_32;
3688  else if (Name[0] == 'a' && Name.back() == '4')
3689  IID = Intrinsic::x86_addcarry_64;
3690  else if (Name[0] == 's' && Name.back() == '2')
3691  IID = Intrinsic::x86_subborrow_32;
3692  else if (Name[0] == 's' && Name.back() == '4')
3693  IID = Intrinsic::x86_subborrow_64;
3694  else
3695  llvm_unreachable("Unexpected intrinsic");
3696 
3697  // Make a call with 3 operands.
3698  Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3699  CI->getArgOperand(2)};
3700  Value *NewCall = Builder.CreateCall(
3702  Args);
3703 
3704  // Extract the second result and store it.
3705  Value *Data = Builder.CreateExtractValue(NewCall, 1);
3706  // Cast the pointer to the right type.
3707  Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
3708  llvm::PointerType::getUnqual(Data->getType()));
3709  Builder.CreateAlignedStore(Data, Ptr, Align(1));
3710  // Replace the original call result with the first result of the new call.
3711  Value *CF = Builder.CreateExtractValue(NewCall, 0);
3712 
3713  CI->replaceAllUsesWith(CF);
3714  Rep = nullptr;
3715  } else if (IsX86 && Name.startswith("avx512.mask.") &&
3716  upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
3717  // Rep will be updated by the call in the condition.
3718  } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
3719  Value *Arg = CI->getArgOperand(0);
3720  Value *Neg = Builder.CreateNeg(Arg, "neg");
3721  Value *Cmp = Builder.CreateICmpSGE(
3722  Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
3723  Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
3724  } else if (IsNVVM && (Name.startswith("atomic.load.add.f32.p") ||
3725  Name.startswith("atomic.load.add.f64.p"))) {
3726  Value *Ptr = CI->getArgOperand(0);
3727  Value *Val = CI->getArgOperand(1);
3728  Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
3730  } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
3731  Name == "max.ui" || Name == "max.ull")) {
3732  Value *Arg0 = CI->getArgOperand(0);
3733  Value *Arg1 = CI->getArgOperand(1);
3734  Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3735  ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
3736  : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
3737  Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
3738  } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
3739  Name == "min.ui" || Name == "min.ull")) {
3740  Value *Arg0 = CI->getArgOperand(0);
3741  Value *Arg1 = CI->getArgOperand(1);
3742  Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3743  ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
3744  : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
3745  Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
3746  } else if (IsNVVM && Name == "clz.ll") {
3747  // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
3748  Value *Arg = CI->getArgOperand(0);
3749  Value *Ctlz = Builder.CreateCall(
3750  Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
3751  {Arg->getType()}),
3752  {Arg, Builder.getFalse()}, "ctlz");
3753  Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
3754  } else if (IsNVVM && Name == "popc.ll") {
3755  // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
3756  // i64.
3757  Value *Arg = CI->getArgOperand(0);
3758  Value *Popc = Builder.CreateCall(
3759  Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
3760  {Arg->getType()}),
3761  Arg, "ctpop");
3762  Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
3763  } else if (IsNVVM && Name == "h2f") {
3764  Rep = Builder.CreateCall(Intrinsic::getDeclaration(
3765  F->getParent(), Intrinsic::convert_from_fp16,
3766  {Builder.getFloatTy()}),
3767  CI->getArgOperand(0), "h2f");
3768  } else if (IsARM) {
3769  Rep = UpgradeARMIntrinsicCall(Name, CI, F, Builder);
3770  } else {
3771  llvm_unreachable("Unknown function for CallInst upgrade.");
3772  }
3773 
3774  if (Rep)
3775  CI->replaceAllUsesWith(Rep);
3776  CI->eraseFromParent();
3777  return;
3778  }
3779 
3780  const auto &DefaultCase = [&NewFn, &CI]() -> void {
3781  // Handle generic mangling change, but nothing else
3782  assert(
3783  (CI->getCalledFunction()->getName() != NewFn->getName()) &&
3784  "Unknown function for CallInst upgrade and isn't just a name change");
3785  CI->setCalledFunction(NewFn);
3786  };
3787  CallInst *NewCall = nullptr;
3788  switch (NewFn->getIntrinsicID()) {
3789  default: {
3790  DefaultCase();
3791  return;
3792  }
3793  case Intrinsic::arm_neon_vld1:
3794  case Intrinsic::arm_neon_vld2:
3795  case Intrinsic::arm_neon_vld3:
3796  case Intrinsic::arm_neon_vld4:
3797  case Intrinsic::arm_neon_vld2lane:
3798  case Intrinsic::arm_neon_vld3lane:
3799  case Intrinsic::arm_neon_vld4lane:
3800  case Intrinsic::arm_neon_vst1:
3801  case Intrinsic::arm_neon_vst2:
3802  case Intrinsic::arm_neon_vst3:
3803  case Intrinsic::arm_neon_vst4:
3804  case Intrinsic::arm_neon_vst2lane:
3805  case Intrinsic::arm_neon_vst3lane:
3806  case Intrinsic::arm_neon_vst4lane: {
3808  NewCall = Builder.CreateCall(NewFn, Args);
3809  break;
3810  }
3811 
3812  case Intrinsic::arm_neon_bfdot:
3813  case Intrinsic::arm_neon_bfmmla:
3814  case Intrinsic::arm_neon_bfmlalb:
3815  case Intrinsic::arm_neon_bfmlalt:
3816  case Intrinsic::aarch64_neon_bfdot:
3817  case Intrinsic::aarch64_neon_bfmmla:
3818  case Intrinsic::aarch64_neon_bfmlalb:
3819  case Intrinsic::aarch64_neon_bfmlalt: {
3821  assert(CI->arg_size() == 3 &&
3822  "Mismatch between function args and call args");
3823  size_t OperandWidth =
3825  assert((OperandWidth == 64 || OperandWidth == 128) &&
3826  "Unexpected operand width");
3827  Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
3828  auto Iter = CI->args().begin();
3829  Args.push_back(*Iter++);
3830  Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
3831  Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
3832  NewCall = Builder.CreateCall(NewFn, Args);
3833  break;
3834  }
3835 
3836  case Intrinsic::bitreverse:
3837  NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3838  break;
3839 
3840  case Intrinsic::ctlz:
3841  case Intrinsic::cttz:
3842  assert(CI->arg_size() == 1 &&
3843  "Mismatch between function args and call args");
3844  NewCall =
3845  Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
3846  break;
3847 
3848  case Intrinsic::objectsize: {
3849  Value *NullIsUnknownSize =
3850  CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
3851  Value *Dynamic =
3852  CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
3853  NewCall = Builder.CreateCall(
3854  NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
3855  break;
3856  }
3857 
3858  case Intrinsic::ctpop:
3859  NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3860  break;
3861 
3862  case Intrinsic::convert_from_fp16:
3863  NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3864  break;
3865 
3866  case Intrinsic::dbg_value:
3867  // Upgrade from the old version that had an extra offset argument.
3868  assert(CI->arg_size() == 4);
3869  // Drop nonzero offsets instead of attempting to upgrade them.
3870  if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
3871  if (Offset->isZeroValue()) {
3872  NewCall = Builder.CreateCall(
3873  NewFn,
3874  {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
3875  break;
3876  }
3877  CI->eraseFromParent();
3878  return;
3879 
3880  case Intrinsic::ptr_annotation:
3881  // Upgrade from versions that lacked the annotation attribute argument.
3882  assert(CI->arg_size() == 4 &&
3883  "Before LLVM 12.0 this intrinsic took four arguments");
3884  // Create a new call with an added null annotation attribute argument.
3885  NewCall = Builder.CreateCall(
3886  NewFn,
3887  {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
3888  CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
3889  NewCall->takeName(CI);
3890  CI->replaceAllUsesWith(NewCall);
3891  CI->eraseFromParent();
3892  return;
3893 
3894  case Intrinsic::var_annotation:
3895  // Upgrade from versions that lacked the annotation attribute argument.
3896  assert(CI->arg_size() == 4 &&
3897  "Before LLVM 12.0 this intrinsic took four arguments");
3898  // Create a new call with an added null annotation attribute argument.
3899  NewCall = Builder.CreateCall(
3900  NewFn,
3901  {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
3902  CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
3903  CI->eraseFromParent();
3904  return;
3905 
3906  case Intrinsic::x86_xop_vfrcz_ss:
3907  case Intrinsic::x86_xop_vfrcz_sd:
3908  NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
3909  break;
3910 
3911  case Intrinsic::x86_xop_vpermil2pd:
3912  case Intrinsic::x86_xop_vpermil2ps:
3913  case Intrinsic::x86_xop_vpermil2pd_256:
3914  case Intrinsic::x86_xop_vpermil2ps_256: {
3916  VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
3917  VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
3918  Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
3919  NewCall = Builder.CreateCall(NewFn, Args);
3920  break;
3921  }
3922 
3923  case Intrinsic::x86_sse41_ptestc:
3924  case Intrinsic::x86_sse41_ptestz:
3925  case Intrinsic::x86_sse41_ptestnzc: {
3926  // The arguments for these intrinsics used to be v4f32, and changed
3927  // to v2i64. This is purely a nop, since those are bitwise intrinsics.
3928  // So, the only thing required is a bitcast for both arguments.
3929  // First, check the arguments have the old type.
3930  Value *Arg0 = CI->getArgOperand(0);
3931  if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
3932  return;
3933 
3934  // Old intrinsic, add bitcasts
3935  Value *Arg1 = CI->getArgOperand(1);
3936 
3937  auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
3938 
3939  Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
3940  Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
3941 
3942  NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
3943  break;
3944  }
3945 
3946  case Intrinsic::x86_rdtscp: {
3947  // This used to take 1 arguments. If we have no arguments, it is already
3948  // upgraded.
3949  if (CI->getNumOperands() == 0)
3950  return;
3951 
3952  NewCall = Builder.CreateCall(NewFn);
3953  // Extract the second result and store it.
3954  Value *Data = Builder.CreateExtractValue(NewCall, 1);
3955  // Cast the pointer to the right type.
3956  Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
3957  llvm::PointerType::getUnqual(Data->getType()));
3958  Builder.CreateAlignedStore(Data, Ptr, Align(1));
3959  // Replace the original call result with the first result of the new call.
3960  Value *TSC = Builder.CreateExtractValue(NewCall, 0);
3961 
3962  NewCall->takeName(CI);
3963  CI->replaceAllUsesWith(TSC);
3964  CI->eraseFromParent();
3965  return;
3966  }
3967 
3968  case Intrinsic::x86_sse41_insertps:
3969  case Intrinsic::x86_sse41_dppd:
3970  case Intrinsic::x86_sse41_dpps:
3971  case Intrinsic::x86_sse41_mpsadbw:
3972  case Intrinsic::x86_avx_dp_ps_256:
3973  case Intrinsic::x86_avx2_mpsadbw: {
3974  // Need to truncate the last argument from i32 to i8 -- this argument models
3975  // an inherently 8-bit immediate operand to these x86 instructions.
3977 
3978  // Replace the last argument with a trunc.
3979  Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
3980  NewCall = Builder.CreateCall(NewFn, Args);
3981  break;
3982  }
3983 
3984  case Intrinsic::x86_avx512_mask_cmp_pd_128:
3985  case Intrinsic::x86_avx512_mask_cmp_pd_256:
3986  case Intrinsic::x86_avx512_mask_cmp_pd_512:
3987  case Intrinsic::x86_avx512_mask_cmp_ps_128:
3988  case Intrinsic::x86_avx512_mask_cmp_ps_256:
3989  case Intrinsic::x86_avx512_mask_cmp_ps_512: {
3991  unsigned NumElts =
3992  cast<FixedVectorType>(Args[0]->getType())->getNumElements();
3993  Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
3994 
3995  NewCall = Builder.CreateCall(NewFn, Args);
3996  Value *Res = ApplyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
3997 
3998  NewCall->takeName(CI);
3999  CI->replaceAllUsesWith(Res);
4000  CI->eraseFromParent();
4001  return;
4002  }
4003 
4004  case Intrinsic::thread_pointer: {
4005  NewCall = Builder.CreateCall(NewFn, {});
4006  break;
4007  }
4008 
4009  case Intrinsic::invariant_start:
4010  case Intrinsic::invariant_end: {
4012  NewCall = Builder.CreateCall(NewFn, Args);
4013  break;
4014  }
4015  case Intrinsic::masked_load:
4016  case Intrinsic::masked_store:
4017  case Intrinsic::masked_gather:
4018  case Intrinsic::masked_scatter: {
4020  NewCall = Builder.CreateCall(NewFn, Args);
4021  NewCall->copyMetadata(*CI);
4022  break;
4023  }
4024 
4025  case Intrinsic::memcpy:
4026  case Intrinsic::memmove:
4027  case Intrinsic::memset: {
4028  // We have to make sure that the call signature is what we're expecting.
4029  // We only want to change the old signatures by removing the alignment arg:
4030  // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
4031  // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
4032  // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
4033  // -> @llvm.memset...(i8*, i8, i[32|64], i1)
4034  // Note: i8*'s in the above can be any pointer type
4035  if (CI->arg_size() != 5) {
4036  DefaultCase();
4037  return;
4038  }
4039  // Remove alignment argument (3), and add alignment attributes to the
4040  // dest/src pointers.
4041  Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
4042  CI->getArgOperand(2), CI->getArgOperand(4)};
4043  NewCall = Builder.CreateCall(NewFn, Args);
4044  auto *MemCI = cast<MemIntrinsic>(NewCall);
4045  // All mem intrinsics support dest alignment.
4046  const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
4047  MemCI->setDestAlignment(Align->getMaybeAlignValue());
4048  // Memcpy/Memmove also support source alignment.
4049  if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
4050  MTI->setSourceAlignment(Align->getMaybeAlignValue());
4051  break;
4052  }
4053  }
4054  assert(NewCall && "Should have either set this variable or returned through "
4055  "the default case");
4056  NewCall->takeName(CI);
4057  CI->replaceAllUsesWith(NewCall);
4058  CI->eraseFromParent();
4059 }
4060 
4062  assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
4063 
4064  // Check if this function should be upgraded and get the replacement function
4065  // if there is one.
4066  Function *NewFn;
4067  if (UpgradeIntrinsicFunction(F, NewFn)) {
4068  // Replace all users of the old function with the new function or new
4069  // instructions. This is not a range loop because the call is deleted.
4070  for (User *U : make_early_inc_range(F->users()))
4071  if (CallInst *CI = dyn_cast<CallInst>(U))
4072  UpgradeIntrinsicCall(CI, NewFn);
4073 
4074  // Remove old function, no longer used, from the module.
4075  F->eraseFromParent();
4076  }
4077 }
4078 
4080  // Check if the tag uses struct-path aware TBAA format.
4081  if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
4082  return &MD;
4083 
4084  auto &Context = MD.getContext();
4085  if (MD.getNumOperands() == 3) {
4086  Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
4087  MDNode *ScalarType = MDNode::get(Context, Elts);
4088  // Create a MDNode <ScalarType, ScalarType, offset 0, const>
4089  Metadata *Elts2[] = {ScalarType, ScalarType,
4092  MD.getOperand(2)};
4093  return MDNode::get(Context, Elts2);
4094  }
4095  // Create a MDNode <MD, MD, offset 0>
4098  return MDNode::get(Context, Elts);
4099 }
4100 
4101 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
4102  Instruction *&Temp) {
4103  if (Opc != Instruction::BitCast)
4104  return nullptr;
4105 
4106  Temp = nullptr;
4107  Type *SrcTy = V->getType();
4108  if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4109  SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4110  LLVMContext &Context = V->getContext();
4111 
4112  // We have no information about target data layout, so we assume that
4113  // the maximum pointer size is 64bit.
4114  Type *MidTy = Type::getInt64Ty(Context);
4115  Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
4116 
4117  return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
4118  }
4119 
4120  return nullptr;
4121 }
4122 
4123 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
4124  if (Opc != Instruction::BitCast)
4125  return nullptr;
4126 
4127  Type *SrcTy = C->getType();
4128  if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4129  SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4130  LLVMContext &Context = C->getContext();
4131 
4132  // We have no information about target data layout, so we assume that
4133  // the maximum pointer size is 64bit.
4134  Type *MidTy = Type::getInt64Ty(Context);
4135 
4137  DestTy);
4138  }
4139 
4140  return nullptr;
4141 }
4142 
4143 /// Check the debug info version number, if it is out-dated, drop the debug
4144 /// info. Return true if module is modified.
4148  bool BrokenDebugInfo = false;
4149  if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
4150  report_fatal_error("Broken module found, compilation aborted!");
4151  if (!BrokenDebugInfo)
4152  // Everything is ok.
4153  return false;
4154  else {
4155  // Diagnose malformed debug info.
4157  M.getContext().diagnose(Diag);
4158  }
4159  }
4160  bool Modified = StripDebugInfo(M);
4162  // Diagnose a version mismatch.
4164  M.getContext().diagnose(DiagVersion);
4165  }
4166  return Modified;
4167 }
4168 
4169 /// This checks for objc retain release marker which should be upgraded. It
4170 /// returns true if module is modified.
4172  bool Changed = false;
4173  const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
4174  NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
4175  if (ModRetainReleaseMarker) {
4176  MDNode *Op = ModRetainReleaseMarker->getOperand(0);
4177  if (Op) {
4178  MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
4179  if (ID) {
4180  SmallVector<StringRef, 4> ValueComp;
4181  ID->getString().split(ValueComp, "#");
4182  if (ValueComp.size() == 2) {
4183  std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
4184  ID = MDString::get(M.getContext(), NewValue);
4185  }
4186  M.addModuleFlag(Module::Error, MarkerKey, ID);
4187  M.eraseNamedMetadata(ModRetainReleaseMarker);
4188  Changed = true;
4189  }
4190  }
4191  }
4192  return Changed;
4193 }
4194 
4196  // This lambda converts normal function calls to ARC runtime functions to
4197  // intrinsic calls.
4198  auto UpgradeToIntrinsic = [&](const char *OldFunc,
4199  llvm::Intrinsic::ID IntrinsicFunc) {
4200  Function *Fn = M.getFunction(OldFunc);
4201 
4202  if (!Fn)
4203  return;
4204 
4205  Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc);
4206 
4207  for (User *U : make_early_inc_range(Fn->users())) {
4208  CallInst *CI = dyn_cast<CallInst>(U);
4209  if (!CI || CI->getCalledFunction() != Fn)
4210