LLVM  13.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the auto-upgrade helper functions.
10 // This is where deprecated IR intrinsics and other IR features are updated to
11 // current specifications.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/IR/AutoUpgrade.h"
16 #include "llvm/ADT/StringSwitch.h"
17 #include "llvm/IR/Constants.h"
18 #include "llvm/IR/DIBuilder.h"
19 #include "llvm/IR/DebugInfo.h"
20 #include "llvm/IR/DiagnosticInfo.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/IRBuilder.h"
23 #include "llvm/IR/InstVisitor.h"
24 #include "llvm/IR/Instruction.h"
25 #include "llvm/IR/IntrinsicInst.h"
26 #include "llvm/IR/Intrinsics.h"
27 #include "llvm/IR/IntrinsicsAArch64.h"
28 #include "llvm/IR/IntrinsicsARM.h"
29 #include "llvm/IR/IntrinsicsX86.h"
30 #include "llvm/IR/LLVMContext.h"
31 #include "llvm/IR/Module.h"
32 #include "llvm/IR/Verifier.h"
34 #include "llvm/Support/Regex.h"
35 #include <cstring>
36 using namespace llvm;
37 
38 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
39 
40 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
41 // changed their type from v4f32 to v2i64.
43  Function *&NewFn) {
44  // Check whether this is an old version of the function, which received
45  // v4f32 arguments.
46  Type *Arg0Type = F->getFunctionType()->getParamType(0);
47  if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
48  return false;
49 
50  // Yes, it's old, replace it with new version.
51  rename(F);
52  NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
53  return true;
54 }
55 
56 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
57 // arguments have changed their type from i32 to i8.
59  Function *&NewFn) {
60  // Check that the last argument is an i32.
61  Type *LastArgType = F->getFunctionType()->getParamType(
62  F->getFunctionType()->getNumParams() - 1);
63  if (!LastArgType->isIntegerTy(32))
64  return false;
65 
66  // Move this function aside and map down.
67  rename(F);
68  NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
69  return true;
70 }
71 
72 // Upgrade the declaration of fp compare intrinsics that change return type
73 // from scalar to vXi1 mask.
75  Function *&NewFn) {
76  // Check if the return type is a vector.
77  if (F->getReturnType()->isVectorTy())
78  return false;
79 
80  rename(F);
81  NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
82  return true;
83 }
84 
86  // All of the intrinsics matches below should be marked with which llvm
87  // version started autoupgrading them. At some point in the future we would
88  // like to use this information to remove upgrade code for some older
89  // intrinsics. It is currently undecided how we will determine that future
90  // point.
91  if (Name == "addcarryx.u32" || // Added in 8.0
92  Name == "addcarryx.u64" || // Added in 8.0
93  Name == "addcarry.u32" || // Added in 8.0
94  Name == "addcarry.u64" || // Added in 8.0
95  Name == "subborrow.u32" || // Added in 8.0
96  Name == "subborrow.u64" || // Added in 8.0
97  Name.startswith("sse2.padds.") || // Added in 8.0
98  Name.startswith("sse2.psubs.") || // Added in 8.0
99  Name.startswith("sse2.paddus.") || // Added in 8.0
100  Name.startswith("sse2.psubus.") || // Added in 8.0
101  Name.startswith("avx2.padds.") || // Added in 8.0
102  Name.startswith("avx2.psubs.") || // Added in 8.0
103  Name.startswith("avx2.paddus.") || // Added in 8.0
104  Name.startswith("avx2.psubus.") || // Added in 8.0
105  Name.startswith("avx512.padds.") || // Added in 8.0
106  Name.startswith("avx512.psubs.") || // Added in 8.0
107  Name.startswith("avx512.mask.padds.") || // Added in 8.0
108  Name.startswith("avx512.mask.psubs.") || // Added in 8.0
109  Name.startswith("avx512.mask.paddus.") || // Added in 8.0
110  Name.startswith("avx512.mask.psubus.") || // Added in 8.0
111  Name=="ssse3.pabs.b.128" || // Added in 6.0
112  Name=="ssse3.pabs.w.128" || // Added in 6.0
113  Name=="ssse3.pabs.d.128" || // Added in 6.0
114  Name.startswith("fma4.vfmadd.s") || // Added in 7.0
115  Name.startswith("fma.vfmadd.") || // Added in 7.0
116  Name.startswith("fma.vfmsub.") || // Added in 7.0
117  Name.startswith("fma.vfmsubadd.") || // Added in 7.0
118  Name.startswith("fma.vfnmadd.") || // Added in 7.0
119  Name.startswith("fma.vfnmsub.") || // Added in 7.0
120  Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0
121  Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
122  Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
123  Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
124  Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
125  Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
126  Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
127  Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
128  Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
129  Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
130  Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
131  Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
132  Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
133  Name.startswith("avx512.kunpck") || //added in 6.0
134  Name.startswith("avx2.pabs.") || // Added in 6.0
135  Name.startswith("avx512.mask.pabs.") || // Added in 6.0
136  Name.startswith("avx512.broadcastm") || // Added in 6.0
137  Name == "sse.sqrt.ss" || // Added in 7.0
138  Name == "sse2.sqrt.sd" || // Added in 7.0
139  Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0
140  Name.startswith("avx.sqrt.p") || // Added in 7.0
141  Name.startswith("sse2.sqrt.p") || // Added in 7.0
142  Name.startswith("sse.sqrt.p") || // Added in 7.0
143  Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
144  Name.startswith("sse2.pcmpeq.") || // Added in 3.1
145  Name.startswith("sse2.pcmpgt.") || // Added in 3.1
146  Name.startswith("avx2.pcmpeq.") || // Added in 3.1
147  Name.startswith("avx2.pcmpgt.") || // Added in 3.1
148  Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
149  Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
150  Name.startswith("avx.vperm2f128.") || // Added in 6.0
151  Name == "avx2.vperm2i128" || // Added in 6.0
152  Name == "sse.add.ss" || // Added in 4.0
153  Name == "sse2.add.sd" || // Added in 4.0
154  Name == "sse.sub.ss" || // Added in 4.0
155  Name == "sse2.sub.sd" || // Added in 4.0
156  Name == "sse.mul.ss" || // Added in 4.0
157  Name == "sse2.mul.sd" || // Added in 4.0
158  Name == "sse.div.ss" || // Added in 4.0
159  Name == "sse2.div.sd" || // Added in 4.0
160  Name == "sse41.pmaxsb" || // Added in 3.9
161  Name == "sse2.pmaxs.w" || // Added in 3.9
162  Name == "sse41.pmaxsd" || // Added in 3.9
163  Name == "sse2.pmaxu.b" || // Added in 3.9
164  Name == "sse41.pmaxuw" || // Added in 3.9
165  Name == "sse41.pmaxud" || // Added in 3.9
166  Name == "sse41.pminsb" || // Added in 3.9
167  Name == "sse2.pmins.w" || // Added in 3.9
168  Name == "sse41.pminsd" || // Added in 3.9
169  Name == "sse2.pminu.b" || // Added in 3.9
170  Name == "sse41.pminuw" || // Added in 3.9
171  Name == "sse41.pminud" || // Added in 3.9
172  Name == "avx512.kand.w" || // Added in 7.0
173  Name == "avx512.kandn.w" || // Added in 7.0
174  Name == "avx512.knot.w" || // Added in 7.0
175  Name == "avx512.kor.w" || // Added in 7.0
176  Name == "avx512.kxor.w" || // Added in 7.0
177  Name == "avx512.kxnor.w" || // Added in 7.0
178  Name == "avx512.kortestc.w" || // Added in 7.0
179  Name == "avx512.kortestz.w" || // Added in 7.0
180  Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
181  Name.startswith("avx2.pmax") || // Added in 3.9
182  Name.startswith("avx2.pmin") || // Added in 3.9
183  Name.startswith("avx512.mask.pmax") || // Added in 4.0
184  Name.startswith("avx512.mask.pmin") || // Added in 4.0
185  Name.startswith("avx2.vbroadcast") || // Added in 3.8
186  Name.startswith("avx2.pbroadcast") || // Added in 3.8
187  Name.startswith("avx.vpermil.") || // Added in 3.1
188  Name.startswith("sse2.pshuf") || // Added in 3.9
189  Name.startswith("avx512.pbroadcast") || // Added in 3.9
190  Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
191  Name.startswith("avx512.mask.movddup") || // Added in 3.9
192  Name.startswith("avx512.mask.movshdup") || // Added in 3.9
193  Name.startswith("avx512.mask.movsldup") || // Added in 3.9
194  Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
195  Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
196  Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
197  Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
198  Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
199  Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
200  Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
201  Name.startswith("avx512.mask.punpckl") || // Added in 3.9
202  Name.startswith("avx512.mask.punpckh") || // Added in 3.9
203  Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
204  Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
205  Name.startswith("avx512.mask.pand.") || // Added in 3.9
206  Name.startswith("avx512.mask.pandn.") || // Added in 3.9
207  Name.startswith("avx512.mask.por.") || // Added in 3.9
208  Name.startswith("avx512.mask.pxor.") || // Added in 3.9
209  Name.startswith("avx512.mask.and.") || // Added in 3.9
210  Name.startswith("avx512.mask.andn.") || // Added in 3.9
211  Name.startswith("avx512.mask.or.") || // Added in 3.9
212  Name.startswith("avx512.mask.xor.") || // Added in 3.9
213  Name.startswith("avx512.mask.padd.") || // Added in 4.0
214  Name.startswith("avx512.mask.psub.") || // Added in 4.0
215  Name.startswith("avx512.mask.pmull.") || // Added in 4.0
216  Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
217  Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
218  Name.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0
219  Name.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0
220  Name.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0
221  Name.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0
222  Name == "avx512.mask.vcvtph2ps.128" || // Added in 11.0
223  Name == "avx512.mask.vcvtph2ps.256" || // Added in 11.0
224  Name == "avx512.mask.cvtqq2ps.256" || // Added in 9.0
225  Name == "avx512.mask.cvtqq2ps.512" || // Added in 9.0
226  Name == "avx512.mask.cvtuqq2ps.256" || // Added in 9.0
227  Name == "avx512.mask.cvtuqq2ps.512" || // Added in 9.0
228  Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0
229  Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0
230  Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0
231  Name == "avx512.mask.cvttps2dq.128" || // Added in 7.0
232  Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0
233  Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
234  Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
235  Name == "avx512.cvtusi2sd" || // Added in 7.0
236  Name.startswith("avx512.mask.permvar.") || // Added in 7.0
237  Name == "sse2.pmulu.dq" || // Added in 7.0
238  Name == "sse41.pmuldq" || // Added in 7.0
239  Name == "avx2.pmulu.dq" || // Added in 7.0
240  Name == "avx2.pmul.dq" || // Added in 7.0
241  Name == "avx512.pmulu.dq.512" || // Added in 7.0
242  Name == "avx512.pmul.dq.512" || // Added in 7.0
243  Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
244  Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
245  Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
246  Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
247  Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
248  Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
249  Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
250  Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
251  Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
252  Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
253  Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
254  Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
255  Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
256  Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
257  Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
258  Name.startswith("avx512.cmp.p") || // Added in 12.0
259  Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
260  Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
261  Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
262  Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
263  Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
264  Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
265  Name.startswith("avx512.mask.psll.d") || // Added in 4.0
266  Name.startswith("avx512.mask.psll.q") || // Added in 4.0
267  Name.startswith("avx512.mask.psll.w") || // Added in 4.0
268  Name.startswith("avx512.mask.psra.d") || // Added in 4.0
269  Name.startswith("avx512.mask.psra.q") || // Added in 4.0
270  Name.startswith("avx512.mask.psra.w") || // Added in 4.0
271  Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
272  Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
273  Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
274  Name.startswith("avx512.mask.pslli") || // Added in 4.0
275  Name.startswith("avx512.mask.psrai") || // Added in 4.0
276  Name.startswith("avx512.mask.psrli") || // Added in 4.0
277  Name.startswith("avx512.mask.psllv") || // Added in 4.0
278  Name.startswith("avx512.mask.psrav") || // Added in 4.0
279  Name.startswith("avx512.mask.psrlv") || // Added in 4.0
280  Name.startswith("sse41.pmovsx") || // Added in 3.8
281  Name.startswith("sse41.pmovzx") || // Added in 3.9
282  Name.startswith("avx2.pmovsx") || // Added in 3.9
283  Name.startswith("avx2.pmovzx") || // Added in 3.9
284  Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
285  Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
286  Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
287  Name.startswith("avx512.mask.pternlog.") || // Added in 7.0
288  Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
289  Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0
290  Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
291  Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
292  Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
293  Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
294  Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
295  Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
296  Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
297  Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
298  Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
299  Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
300  Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
301  Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
302  Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
303  Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
304  Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
305  Name.startswith("avx512.mask.vpshldv.") || // Added in 8.0
306  Name.startswith("avx512.mask.vpshrdv.") || // Added in 8.0
307  Name.startswith("avx512.maskz.vpshldv.") || // Added in 8.0
308  Name.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0
309  Name.startswith("avx512.vpshld.") || // Added in 8.0
310  Name.startswith("avx512.vpshrd.") || // Added in 8.0
311  Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
312  Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
313  Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
314  Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
315  Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
316  Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
317  Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
318  Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
319  Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
320  Name.startswith("avx512.mask.conflict.") || // Added in 9.0
321  Name == "avx512.mask.pmov.qd.256" || // Added in 9.0
322  Name == "avx512.mask.pmov.qd.512" || // Added in 9.0
323  Name == "avx512.mask.pmov.wb.256" || // Added in 9.0
324  Name == "avx512.mask.pmov.wb.512" || // Added in 9.0
325  Name == "sse.cvtsi2ss" || // Added in 7.0
326  Name == "sse.cvtsi642ss" || // Added in 7.0
327  Name == "sse2.cvtsi2sd" || // Added in 7.0
328  Name == "sse2.cvtsi642sd" || // Added in 7.0
329  Name == "sse2.cvtss2sd" || // Added in 7.0
330  Name == "sse2.cvtdq2pd" || // Added in 3.9
331  Name == "sse2.cvtdq2ps" || // Added in 7.0
332  Name == "sse2.cvtps2pd" || // Added in 3.9
333  Name == "avx.cvtdq2.pd.256" || // Added in 3.9
334  Name == "avx.cvtdq2.ps.256" || // Added in 7.0
335  Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
336  Name.startswith("vcvtph2ps.") || // Added in 11.0
337  Name.startswith("avx.vinsertf128.") || // Added in 3.7
338  Name == "avx2.vinserti128" || // Added in 3.7
339  Name.startswith("avx512.mask.insert") || // Added in 4.0
340  Name.startswith("avx.vextractf128.") || // Added in 3.7
341  Name == "avx2.vextracti128" || // Added in 3.7
342  Name.startswith("avx512.mask.vextract") || // Added in 4.0
343  Name.startswith("sse4a.movnt.") || // Added in 3.9
344  Name.startswith("avx.movnt.") || // Added in 3.2
345  Name.startswith("avx512.storent.") || // Added in 3.9
346  Name == "sse41.movntdqa" || // Added in 5.0
347  Name == "avx2.movntdqa" || // Added in 5.0
348  Name == "avx512.movntdqa" || // Added in 5.0
349  Name == "sse2.storel.dq" || // Added in 3.9
350  Name.startswith("sse.storeu.") || // Added in 3.9
351  Name.startswith("sse2.storeu.") || // Added in 3.9
352  Name.startswith("avx.storeu.") || // Added in 3.9
353  Name.startswith("avx512.mask.storeu.") || // Added in 3.9
354  Name.startswith("avx512.mask.store.p") || // Added in 3.9
355  Name.startswith("avx512.mask.store.b.") || // Added in 3.9
356  Name.startswith("avx512.mask.store.w.") || // Added in 3.9
357  Name.startswith("avx512.mask.store.d.") || // Added in 3.9
358  Name.startswith("avx512.mask.store.q.") || // Added in 3.9
359  Name == "avx512.mask.store.ss" || // Added in 7.0
360  Name.startswith("avx512.mask.loadu.") || // Added in 3.9
361  Name.startswith("avx512.mask.load.") || // Added in 3.9
362  Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
363  Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
364  Name.startswith("avx512.mask.expand.b") || // Added in 9.0
365  Name.startswith("avx512.mask.expand.w") || // Added in 9.0
366  Name.startswith("avx512.mask.expand.d") || // Added in 9.0
367  Name.startswith("avx512.mask.expand.q") || // Added in 9.0
368  Name.startswith("avx512.mask.expand.p") || // Added in 9.0
369  Name.startswith("avx512.mask.compress.b") || // Added in 9.0
370  Name.startswith("avx512.mask.compress.w") || // Added in 9.0
371  Name.startswith("avx512.mask.compress.d") || // Added in 9.0
372  Name.startswith("avx512.mask.compress.q") || // Added in 9.0
373  Name.startswith("avx512.mask.compress.p") || // Added in 9.0
374  Name == "sse42.crc32.64.8" || // Added in 3.4
375  Name.startswith("avx.vbroadcast.s") || // Added in 3.5
376  Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
377  Name.startswith("avx512.mask.palignr.") || // Added in 3.9
378  Name.startswith("avx512.mask.valign.") || // Added in 4.0
379  Name.startswith("sse2.psll.dq") || // Added in 3.7
380  Name.startswith("sse2.psrl.dq") || // Added in 3.7
381  Name.startswith("avx2.psll.dq") || // Added in 3.7
382  Name.startswith("avx2.psrl.dq") || // Added in 3.7
383  Name.startswith("avx512.psll.dq") || // Added in 3.9
384  Name.startswith("avx512.psrl.dq") || // Added in 3.9
385  Name == "sse41.pblendw" || // Added in 3.7
386  Name.startswith("sse41.blendp") || // Added in 3.7
387  Name.startswith("avx.blend.p") || // Added in 3.7
388  Name == "avx2.pblendw" || // Added in 3.7
389  Name.startswith("avx2.pblendd.") || // Added in 3.7
390  Name.startswith("avx.vbroadcastf128") || // Added in 4.0
391  Name == "avx2.vbroadcasti128" || // Added in 3.7
392  Name.startswith("avx512.mask.broadcastf32x4.") || // Added in 6.0
393  Name.startswith("avx512.mask.broadcastf64x2.") || // Added in 6.0
394  Name.startswith("avx512.mask.broadcastf32x8.") || // Added in 6.0
395  Name.startswith("avx512.mask.broadcastf64x4.") || // Added in 6.0
396  Name.startswith("avx512.mask.broadcasti32x4.") || // Added in 6.0
397  Name.startswith("avx512.mask.broadcasti64x2.") || // Added in 6.0
398  Name.startswith("avx512.mask.broadcasti32x8.") || // Added in 6.0
399  Name.startswith("avx512.mask.broadcasti64x4.") || // Added in 6.0
400  Name == "xop.vpcmov" || // Added in 3.8
401  Name == "xop.vpcmov.256" || // Added in 5.0
402  Name.startswith("avx512.mask.move.s") || // Added in 4.0
403  Name.startswith("avx512.cvtmask2") || // Added in 5.0
404  Name.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0
405  Name.startswith("xop.vprot") || // Added in 8.0
406  Name.startswith("avx512.prol") || // Added in 8.0
407  Name.startswith("avx512.pror") || // Added in 8.0
408  Name.startswith("avx512.mask.prorv.") || // Added in 8.0
409  Name.startswith("avx512.mask.pror.") || // Added in 8.0
410  Name.startswith("avx512.mask.prolv.") || // Added in 8.0
411  Name.startswith("avx512.mask.prol.") || // Added in 8.0
412  Name.startswith("avx512.ptestm") || //Added in 6.0
413  Name.startswith("avx512.ptestnm") || //Added in 6.0
414  Name.startswith("avx512.mask.pavg")) // Added in 6.0
415  return true;
416 
417  return false;
418 }
419 
421  Function *&NewFn) {
422  // Only handle intrinsics that start with "x86.".
423  if (!Name.startswith("x86."))
424  return false;
425  // Remove "x86." prefix.
426  Name = Name.substr(4);
427 
429  NewFn = nullptr;
430  return true;
431  }
432 
433  if (Name == "rdtscp") { // Added in 8.0
434  // If this intrinsic has 0 operands, it's the new version.
435  if (F->getFunctionType()->getNumParams() == 0)
436  return false;
437 
438  rename(F);
439  NewFn = Intrinsic::getDeclaration(F->getParent(),
440  Intrinsic::x86_rdtscp);
441  return true;
442  }
443 
444  // SSE4.1 ptest functions may have an old signature.
445  if (Name.startswith("sse41.ptest")) { // Added in 3.2
446  if (Name.substr(11) == "c")
447  return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
448  if (Name.substr(11) == "z")
449  return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
450  if (Name.substr(11) == "nzc")
451  return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
452  }
453  // Several blend and other instructions with masks used the wrong number of
454  // bits.
455  if (Name == "sse41.insertps") // Added in 3.6
456  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
457  NewFn);
458  if (Name == "sse41.dppd") // Added in 3.6
459  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
460  NewFn);
461  if (Name == "sse41.dpps") // Added in 3.6
462  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
463  NewFn);
464  if (Name == "sse41.mpsadbw") // Added in 3.6
465  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
466  NewFn);
467  if (Name == "avx.dp.ps.256") // Added in 3.6
468  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
469  NewFn);
470  if (Name == "avx2.mpsadbw") // Added in 3.6
471  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
472  NewFn);
473  if (Name == "avx512.mask.cmp.pd.128") // Added in 7.0
474  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_128,
475  NewFn);
476  if (Name == "avx512.mask.cmp.pd.256") // Added in 7.0
477  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_256,
478  NewFn);
479  if (Name == "avx512.mask.cmp.pd.512") // Added in 7.0
480  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_512,
481  NewFn);
482  if (Name == "avx512.mask.cmp.ps.128") // Added in 7.0
483  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_128,
484  NewFn);
485  if (Name == "avx512.mask.cmp.ps.256") // Added in 7.0
486  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_256,
487  NewFn);
488  if (Name == "avx512.mask.cmp.ps.512") // Added in 7.0
489  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_512,
490  NewFn);
491 
492  // frcz.ss/sd may need to have an argument dropped. Added in 3.2
493  if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
494  rename(F);
495  NewFn = Intrinsic::getDeclaration(F->getParent(),
496  Intrinsic::x86_xop_vfrcz_ss);
497  return true;
498  }
499  if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
500  rename(F);
501  NewFn = Intrinsic::getDeclaration(F->getParent(),
502  Intrinsic::x86_xop_vfrcz_sd);
503  return true;
504  }
505  // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
506  if (Name.startswith("xop.vpermil2")) { // Added in 3.9
507  auto Idx = F->getFunctionType()->getParamType(2);
508  if (Idx->isFPOrFPVectorTy()) {
509  rename(F);
510  unsigned IdxSize = Idx->getPrimitiveSizeInBits();
511  unsigned EltSize = Idx->getScalarSizeInBits();
512  Intrinsic::ID Permil2ID;
513  if (EltSize == 64 && IdxSize == 128)
514  Permil2ID = Intrinsic::x86_xop_vpermil2pd;
515  else if (EltSize == 32 && IdxSize == 128)
516  Permil2ID = Intrinsic::x86_xop_vpermil2ps;
517  else if (EltSize == 64 && IdxSize == 256)
518  Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
519  else
520  Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
521  NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
522  return true;
523  }
524  }
525 
526  if (Name == "seh.recoverfp") {
527  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
528  return true;
529  }
530 
531  return false;
532 }
533 
535  assert(F && "Illegal to upgrade a non-existent Function.");
536 
537  // Quickly eliminate it, if it's not a candidate.
538  StringRef Name = F->getName();
539  if (Name.size() <= 8 || !Name.startswith("llvm."))
540  return false;
541  Name = Name.substr(5); // Strip off "llvm."
542 
543  switch (Name[0]) {
544  default: break;
545  case 'a': {
546  if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
547  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
548  F->arg_begin()->getType());
549  return true;
550  }
551  if (Name.startswith("aarch64.neon.frintn")) {
552  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::roundeven,
553  F->arg_begin()->getType());
554  return true;
555  }
556  if (Name.startswith("arm.neon.vclz")) {
557  Type* args[2] = {
558  F->arg_begin()->getType(),
559  Type::getInt1Ty(F->getContext())
560  };
561  // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
562  // the end of the name. Change name from llvm.arm.neon.vclz.* to
563  // llvm.ctlz.*
564  FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
565  NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
566  "llvm.ctlz." + Name.substr(14), F->getParent());
567  return true;
568  }
569  if (Name.startswith("arm.neon.vcnt")) {
570  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
571  F->arg_begin()->getType());
572  return true;
573  }
574  static const Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
575  if (vldRegex.match(Name)) {
576  auto fArgs = F->getFunctionType()->params();
577  SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
578  // Can't use Intrinsic::getDeclaration here as the return types might
579  // then only be structurally equal.
580  FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
581  NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
582  "llvm." + Name + ".p0i8", F->getParent());
583  return true;
584  }
585  static const Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
586  if (vstRegex.match(Name)) {
587  static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
588  Intrinsic::arm_neon_vst2,
589  Intrinsic::arm_neon_vst3,
590  Intrinsic::arm_neon_vst4};
591 
592  static const Intrinsic::ID StoreLaneInts[] = {
593  Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
594  Intrinsic::arm_neon_vst4lane
595  };
596 
597  auto fArgs = F->getFunctionType()->params();
598  Type *Tys[] = {fArgs[0], fArgs[1]};
599  if (Name.find("lane") == StringRef::npos)
600  NewFn = Intrinsic::getDeclaration(F->getParent(),
601  StoreInts[fArgs.size() - 3], Tys);
602  else
603  NewFn = Intrinsic::getDeclaration(F->getParent(),
604  StoreLaneInts[fArgs.size() - 5], Tys);
605  return true;
606  }
607  if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
608  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
609  return true;
610  }
611  if (Name.startswith("arm.neon.vqadds.")) {
612  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::sadd_sat,
613  F->arg_begin()->getType());
614  return true;
615  }
616  if (Name.startswith("arm.neon.vqaddu.")) {
617  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::uadd_sat,
618  F->arg_begin()->getType());
619  return true;
620  }
621  if (Name.startswith("arm.neon.vqsubs.")) {
622  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ssub_sat,
623  F->arg_begin()->getType());
624  return true;
625  }
626  if (Name.startswith("arm.neon.vqsubu.")) {
627  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::usub_sat,
628  F->arg_begin()->getType());
629  return true;
630  }
631  if (Name.startswith("aarch64.neon.addp")) {
632  if (F->arg_size() != 2)
633  break; // Invalid IR.
634  VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
635  if (Ty && Ty->getElementType()->isFloatingPointTy()) {
636  NewFn = Intrinsic::getDeclaration(F->getParent(),
637  Intrinsic::aarch64_neon_faddp, Ty);
638  return true;
639  }
640  }
641 
642  // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and v16i8
643  // respectively
644  if ((Name.startswith("arm.neon.bfdot.") ||
645  Name.startswith("aarch64.neon.bfdot.")) &&
646  Name.endswith("i8")) {
647  Intrinsic::ID IID =
649  .Cases("arm.neon.bfdot.v2f32.v8i8",
650  "arm.neon.bfdot.v4f32.v16i8",
651  Intrinsic::arm_neon_bfdot)
652  .Cases("aarch64.neon.bfdot.v2f32.v8i8",
653  "aarch64.neon.bfdot.v4f32.v16i8",
654  Intrinsic::aarch64_neon_bfdot)
656  if (IID == Intrinsic::not_intrinsic)
657  break;
658 
659  size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
660  assert((OperandWidth == 64 || OperandWidth == 128) &&
661  "Unexpected operand width");
662  LLVMContext &Ctx = F->getParent()->getContext();
663  std::array<Type *, 2> Tys {{
664  F->getReturnType(),
665  FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)
666  }};
667  NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
668  return true;
669  }
670 
671  // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic anymore
672  // and accept v8bf16 instead of v16i8
673  if ((Name.startswith("arm.neon.bfm") ||
674  Name.startswith("aarch64.neon.bfm")) &&
675  Name.endswith(".v4f32.v16i8")) {
676  Intrinsic::ID IID =
678  .Case("arm.neon.bfmmla.v4f32.v16i8",
679  Intrinsic::arm_neon_bfmmla)
680  .Case("arm.neon.bfmlalb.v4f32.v16i8",
681  Intrinsic::arm_neon_bfmlalb)
682  .Case("arm.neon.bfmlalt.v4f32.v16i8",
683  Intrinsic::arm_neon_bfmlalt)
684  .Case("aarch64.neon.bfmmla.v4f32.v16i8",
685  Intrinsic::aarch64_neon_bfmmla)
686  .Case("aarch64.neon.bfmlalb.v4f32.v16i8",
687  Intrinsic::aarch64_neon_bfmlalb)
688  .Case("aarch64.neon.bfmlalt.v4f32.v16i8",
689  Intrinsic::aarch64_neon_bfmlalt)
691  if (IID == Intrinsic::not_intrinsic)
692  break;
693 
694  std::array<Type *, 0> Tys;
695  NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
696  return true;
697  }
698  break;
699  }
700 
701  case 'c': {
702  if (Name.startswith("ctlz.") && F->arg_size() == 1) {
703  rename(F);
704  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
705  F->arg_begin()->getType());
706  return true;
707  }
708  if (Name.startswith("cttz.") && F->arg_size() == 1) {
709  rename(F);
710  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
711  F->arg_begin()->getType());
712  return true;
713  }
714  break;
715  }
716  case 'd': {
717  if (Name == "dbg.value" && F->arg_size() == 4) {
718  rename(F);
719  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
720  return true;
721  }
722  break;
723  }
724  case 'e': {
726  static const Regex R("^experimental.vector.reduce.([a-z]+)\\.[a-z][0-9]+");
727  if (R.match(Name, &Groups)) {
730  .Case("add", Intrinsic::vector_reduce_add)
731  .Case("mul", Intrinsic::vector_reduce_mul)
732  .Case("and", Intrinsic::vector_reduce_and)
733  .Case("or", Intrinsic::vector_reduce_or)
734  .Case("xor", Intrinsic::vector_reduce_xor)
735  .Case("smax", Intrinsic::vector_reduce_smax)
736  .Case("smin", Intrinsic::vector_reduce_smin)
737  .Case("umax", Intrinsic::vector_reduce_umax)
738  .Case("umin", Intrinsic::vector_reduce_umin)
739  .Case("fmax", Intrinsic::vector_reduce_fmax)
740  .Case("fmin", Intrinsic::vector_reduce_fmin)
742  if (ID != Intrinsic::not_intrinsic) {
743  rename(F);
744  auto Args = F->getFunctionType()->params();
745  NewFn = Intrinsic::getDeclaration(F->getParent(), ID, {Args[0]});
746  return true;
747  }
748  }
749  static const Regex R2(
750  "^experimental.vector.reduce.v2.([a-z]+)\\.[fi][0-9]+");
751  Groups.clear();
752  if (R2.match(Name, &Groups)) {
754  if (Groups[1] == "fadd")
755  ID = Intrinsic::vector_reduce_fadd;
756  if (Groups[1] == "fmul")
757  ID = Intrinsic::vector_reduce_fmul;
758  if (ID != Intrinsic::not_intrinsic) {
759  rename(F);
760  auto Args = F->getFunctionType()->params();
761  Type *Tys[] = {Args[1]};
762  NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
763  return true;
764  }
765  }
766  break;
767  }
768  case 'i':
769  case 'l': {
770  bool IsLifetimeStart = Name.startswith("lifetime.start");
771  if (IsLifetimeStart || Name.startswith("invariant.start")) {
772  Intrinsic::ID ID = IsLifetimeStart ?
773  Intrinsic::lifetime_start : Intrinsic::invariant_start;
774  auto Args = F->getFunctionType()->params();
775  Type* ObjectPtr[1] = {Args[1]};
776  if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
777  rename(F);
778  NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
779  return true;
780  }
781  }
782 
783  bool IsLifetimeEnd = Name.startswith("lifetime.end");
784  if (IsLifetimeEnd || Name.startswith("invariant.end")) {
785  Intrinsic::ID ID = IsLifetimeEnd ?
786  Intrinsic::lifetime_end : Intrinsic::invariant_end;
787 
788  auto Args = F->getFunctionType()->params();
789  Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
790  if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
791  rename(F);
792  NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
793  return true;
794  }
795  }
796  if (Name.startswith("invariant.group.barrier")) {
797  // Rename invariant.group.barrier to launder.invariant.group
798  auto Args = F->getFunctionType()->params();
799  Type* ObjectPtr[1] = {Args[0]};
800  rename(F);
801  NewFn = Intrinsic::getDeclaration(F->getParent(),
802  Intrinsic::launder_invariant_group, ObjectPtr);
803  return true;
804 
805  }
806 
807  break;
808  }
809  case 'm': {
810  if (Name.startswith("masked.load.")) {
811  Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
812  if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
813  rename(F);
814  NewFn = Intrinsic::getDeclaration(F->getParent(),
815  Intrinsic::masked_load,
816  Tys);
817  return true;
818  }
819  }
820  if (Name.startswith("masked.store.")) {
821  auto Args = F->getFunctionType()->params();
822  Type *Tys[] = { Args[0], Args[1] };
823  if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
824  rename(F);
825  NewFn = Intrinsic::getDeclaration(F->getParent(),
826  Intrinsic::masked_store,
827  Tys);
828  return true;
829  }
830  }
831  // Renaming gather/scatter intrinsics with no address space overloading
832  // to the new overload which includes an address space
833  if (Name.startswith("masked.gather.")) {
834  Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
835  if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) {
836  rename(F);
837  NewFn = Intrinsic::getDeclaration(F->getParent(),
838  Intrinsic::masked_gather, Tys);
839  return true;
840  }
841  }
842  if (Name.startswith("masked.scatter.")) {
843  auto Args = F->getFunctionType()->params();
844  Type *Tys[] = {Args[0], Args[1]};
845  if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) {
846  rename(F);
847  NewFn = Intrinsic::getDeclaration(F->getParent(),
848  Intrinsic::masked_scatter, Tys);
849  return true;
850  }
851  }
852  // Updating the memory intrinsics (memcpy/memmove/memset) that have an
853  // alignment parameter to embedding the alignment as an attribute of
854  // the pointer args.
855  if (Name.startswith("memcpy.") && F->arg_size() == 5) {
856  rename(F);
857  // Get the types of dest, src, and len
858  ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
859  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy,
860  ParamTypes);
861  return true;
862  }
863  if (Name.startswith("memmove.") && F->arg_size() == 5) {
864  rename(F);
865  // Get the types of dest, src, and len
866  ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
867  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove,
868  ParamTypes);
869  return true;
870  }
871  if (Name.startswith("memset.") && F->arg_size() == 5) {
872  rename(F);
873  // Get the types of dest, and len
874  const auto *FT = F->getFunctionType();
875  Type *ParamTypes[2] = {
876  FT->getParamType(0), // Dest
877  FT->getParamType(2) // len
878  };
879  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
880  ParamTypes);
881  return true;
882  }
883  break;
884  }
885  case 'n': {
886  if (Name.startswith("nvvm.")) {
887  Name = Name.substr(5);
888 
889  // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
891  .Cases("brev32", "brev64", Intrinsic::bitreverse)
892  .Case("clz.i", Intrinsic::ctlz)
893  .Case("popc.i", Intrinsic::ctpop)
895  if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
896  NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
897  {F->getReturnType()});
898  return true;
899  }
900 
901  // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
902  // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
903  //
904  // TODO: We could add lohi.i2d.
905  bool Expand = StringSwitch<bool>(Name)
906  .Cases("abs.i", "abs.ll", true)
907  .Cases("clz.ll", "popc.ll", "h2f", true)
908  .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
909  .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
910  .StartsWith("atomic.load.add.f32.p", true)
911  .StartsWith("atomic.load.add.f64.p", true)
912  .Default(false);
913  if (Expand) {
914  NewFn = nullptr;
915  return true;
916  }
917  }
918  break;
919  }
920  case 'o':
921  // We only need to change the name to match the mangling including the
922  // address space.
923  if (Name.startswith("objectsize.")) {
924  Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
925  if (F->arg_size() == 2 || F->arg_size() == 3 ||
926  F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
927  rename(F);
928  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
929  Tys);
930  return true;
931  }
932  }
933  break;
934 
935  case 'p':
936  if (Name == "prefetch") {
937  // Handle address space overloading.
938  Type *Tys[] = {F->arg_begin()->getType()};
939  if (F->getName() != Intrinsic::getName(Intrinsic::prefetch, Tys)) {
940  rename(F);
941  NewFn =
942  Intrinsic::getDeclaration(F->getParent(), Intrinsic::prefetch, Tys);
943  return true;
944  }
945  } else if (Name.startswith("ptr.annotation.") && F->arg_size() == 4) {
946  rename(F);
947  NewFn = Intrinsic::getDeclaration(F->getParent(),
948  Intrinsic::ptr_annotation,
949  F->arg_begin()->getType());
950  return true;
951  }
952  break;
953 
954  case 's':
955  if (Name == "stackprotectorcheck") {
956  NewFn = nullptr;
957  return true;
958  }
959  break;
960 
961  case 'v': {
962  if (Name == "var.annotation" && F->arg_size() == 4) {
963  rename(F);
964  NewFn = Intrinsic::getDeclaration(F->getParent(),
965  Intrinsic::var_annotation);
966  return true;
967  }
968  break;
969  }
970 
971  case 'x':
972  if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
973  return true;
974  }
975  // Remangle our intrinsic since we upgrade the mangling
977  if (Result != None) {
978  NewFn = Result.getValue();
979  return true;
980  }
981 
982  // This may not belong here. This function is effectively being overloaded
983  // to both detect an intrinsic which needs upgrading, and to provide the
984  // upgraded form of the intrinsic. We should perhaps have two separate
985  // functions for this.
986  return false;
987 }
988 
990  NewFn = nullptr;
991  bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
992  assert(F != NewFn && "Intrinsic function upgraded to the same function");
993 
994  // Upgrade intrinsic attributes. This does not change the function.
995  if (NewFn)
996  F = NewFn;
997  if (Intrinsic::ID id = F->getIntrinsicID())
998  F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
999  return Upgraded;
1000 }
1001 
1003  if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1004  GV->getName() == "llvm.global_dtors")) ||
1005  !GV->hasInitializer())
1006  return nullptr;
1007  ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
1008  if (!ATy)
1009  return nullptr;
1010  StructType *STy = dyn_cast<StructType>(ATy->getElementType());
1011  if (!STy || STy->getNumElements() != 2)
1012  return nullptr;
1013 
1014  LLVMContext &C = GV->getContext();
1015  IRBuilder<> IRB(C);
1016  auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1017  IRB.getInt8PtrTy());
1018  Constant *Init = GV->getInitializer();
1019  unsigned N = Init->getNumOperands();
1020  std::vector<Constant *> NewCtors(N);
1021  for (unsigned i = 0; i != N; ++i) {
1022  auto Ctor = cast<Constant>(Init->getOperand(i));
1023  NewCtors[i] = ConstantStruct::get(
1024  EltTy, Ctor->getAggregateElement(0u), Ctor->getAggregateElement(1),
1026  }
1027  Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1028 
1029  return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1030  NewInit, GV->getName());
1031 }
1032 
1033 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1034 // to byte shuffles.
1036  Value *Op, unsigned Shift) {
1037  auto *ResultTy = cast<FixedVectorType>(Op->getType());
1038  unsigned NumElts = ResultTy->getNumElements() * 8;
1039 
1040  // Bitcast from a 64-bit element type to a byte element type.
1041  Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1042  Op = Builder.CreateBitCast(Op, VecTy, "cast");
1043 
1044  // We'll be shuffling in zeroes.
1045  Value *Res = Constant::getNullValue(VecTy);
1046 
1047  // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1048  // we'll just return the zero vector.
1049  if (Shift < 16) {
1050  int Idxs[64];
1051  // 256/512-bit version is split into 2/4 16-byte lanes.
1052  for (unsigned l = 0; l != NumElts; l += 16)
1053  for (unsigned i = 0; i != 16; ++i) {
1054  unsigned Idx = NumElts + i - Shift;
1055  if (Idx < NumElts)
1056  Idx -= NumElts - 16; // end of lane, switch operand.
1057  Idxs[l + i] = Idx + l;
1058  }
1059 
1060  Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
1061  }
1062 
1063  // Bitcast back to a 64-bit element type.
1064  return Builder.CreateBitCast(Res, ResultTy, "cast");
1065 }
1066 
1067 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1068 // to byte shuffles.
1070  unsigned Shift) {
1071  auto *ResultTy = cast<FixedVectorType>(Op->getType());
1072  unsigned NumElts = ResultTy->getNumElements() * 8;
1073 
1074  // Bitcast from a 64-bit element type to a byte element type.
1075  Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1076  Op = Builder.CreateBitCast(Op, VecTy, "cast");
1077 
1078  // We'll be shuffling in zeroes.
1079  Value *Res = Constant::getNullValue(VecTy);
1080 
1081  // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1082  // we'll just return the zero vector.
1083  if (Shift < 16) {
1084  int Idxs[64];
1085  // 256/512-bit version is split into 2/4 16-byte lanes.
1086  for (unsigned l = 0; l != NumElts; l += 16)
1087  for (unsigned i = 0; i != 16; ++i) {
1088  unsigned Idx = i + Shift;
1089  if (Idx >= 16)
1090  Idx += NumElts - 16; // end of lane, switch operand.
1091  Idxs[l + i] = Idx + l;
1092  }
1093 
1094  Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
1095  }
1096 
1097  // Bitcast back to a 64-bit element type.
1098  return Builder.CreateBitCast(Res, ResultTy, "cast");
1099 }
1100 
1102  unsigned NumElts) {
1103  assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1105  Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1106  Mask = Builder.CreateBitCast(Mask, MaskTy);
1107 
1108  // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1109  // i8 and we need to extract down to the right number of elements.
1110  if (NumElts <= 4) {
1111  int Indices[4];
1112  for (unsigned i = 0; i != NumElts; ++i)
1113  Indices[i] = i;
1114  Mask = Builder.CreateShuffleVector(
1115  Mask, Mask, makeArrayRef(Indices, NumElts), "extract");
1116  }
1117 
1118  return Mask;
1119 }
1120 
1122  Value *Op0, Value *Op1) {
1123  // If the mask is all ones just emit the first operation.
1124  if (const auto *C = dyn_cast<Constant>(Mask))
1125  if (C->isAllOnesValue())
1126  return Op0;
1127 
1129  cast<FixedVectorType>(Op0->getType())->getNumElements());
1130  return Builder.CreateSelect(Mask, Op0, Op1);
1131 }
1132 
1134  Value *Op0, Value *Op1) {
1135  // If the mask is all ones just emit the first operation.
1136  if (const auto *C = dyn_cast<Constant>(Mask))
1137  if (C->isAllOnesValue())
1138  return Op0;
1139 
1140  auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1141  Mask->getType()->getIntegerBitWidth());
1142  Mask = Builder.CreateBitCast(Mask, MaskTy);
1143  Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1144  return Builder.CreateSelect(Mask, Op0, Op1);
1145 }
1146 
1147 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1148 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1149 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1151  Value *Op1, Value *Shift,
1152  Value *Passthru, Value *Mask,
1153  bool IsVALIGN) {
1154  unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1155 
1156  unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1157  assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1158  assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1159  assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1160 
1161  // Mask the immediate for VALIGN.
1162  if (IsVALIGN)
1163  ShiftVal &= (NumElts - 1);
1164 
1165  // If palignr is shifting the pair of vectors more than the size of two
1166  // lanes, emit zero.
1167  if (ShiftVal >= 32)
1168  return llvm::Constant::getNullValue(Op0->getType());
1169 
1170  // If palignr is shifting the pair of input vectors more than one lane,
1171  // but less than two lanes, convert to shifting in zeroes.
1172  if (ShiftVal > 16) {
1173  ShiftVal -= 16;
1174  Op1 = Op0;
1175  Op0 = llvm::Constant::getNullValue(Op0->getType());
1176  }
1177 
1178  int Indices[64];
1179  // 256-bit palignr operates on 128-bit lanes so we need to handle that
1180  for (unsigned l = 0; l < NumElts; l += 16) {
1181  for (unsigned i = 0; i != 16; ++i) {
1182  unsigned Idx = ShiftVal + i;
1183  if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1184  Idx += NumElts - 16; // End of lane, switch operand.
1185  Indices[l + i] = Idx + l;
1186  }
1187  }
1188 
1189  Value *Align = Builder.CreateShuffleVector(Op1, Op0,
1190  makeArrayRef(Indices, NumElts),
1191  "palignr");
1192 
1193  return EmitX86Select(Builder, Mask, Align, Passthru);
1194 }
1195 
1197  bool ZeroMask, bool IndexForm) {
1198  Type *Ty = CI.getType();
1199  unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1200  unsigned EltWidth = Ty->getScalarSizeInBits();
1201  bool IsFloat = Ty->isFPOrFPVectorTy();
1202  Intrinsic::ID IID;
1203  if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1204  IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1205  else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1206  IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1207  else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1208  IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1209  else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1210  IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1211  else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1212  IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1213  else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1214  IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1215  else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1216  IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1217  else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1218  IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1219  else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1220  IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1221  else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1222  IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1223  else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1224  IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1225  else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1226  IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1227  else if (VecWidth == 128 && EltWidth == 16)
1228  IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1229  else if (VecWidth == 256 && EltWidth == 16)
1230  IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1231  else if (VecWidth == 512 && EltWidth == 16)
1232  IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1233  else if (VecWidth == 128 && EltWidth == 8)
1234  IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1235  else if (VecWidth == 256 && EltWidth == 8)
1236  IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1237  else if (VecWidth == 512 && EltWidth == 8)
1238  IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1239  else
1240  llvm_unreachable("Unexpected intrinsic");
1241 
1242  Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1243  CI.getArgOperand(2) };
1244 
1245  // If this isn't index form we need to swap operand 0 and 1.
1246  if (!IndexForm)
1247  std::swap(Args[0], Args[1]);
1248 
1249  Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1250  Args);
1251  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1252  : Builder.CreateBitCast(CI.getArgOperand(1),
1253  Ty);
1254  return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1255 }
1256 
1258  Intrinsic::ID IID) {
1259  Type *Ty = CI.getType();
1260  Value *Op0 = CI.getOperand(0);
1261  Value *Op1 = CI.getOperand(1);
1262  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1263  Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
1264 
1265  if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
1266  Value *VecSrc = CI.getOperand(2);
1267  Value *Mask = CI.getOperand(3);
1268  Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1269  }
1270  return Res;
1271 }
1272 
1274  bool IsRotateRight) {
1275  Type *Ty = CI.getType();
1276  Value *Src = CI.getArgOperand(0);
1277  Value *Amt = CI.getArgOperand(1);
1278 
1279  // Amount may be scalar immediate, in which case create a splat vector.
1280  // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1281  // we only care about the lowest log2 bits anyway.
1282  if (Amt->getType() != Ty) {
1283  unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1284  Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1285  Amt = Builder.CreateVectorSplat(NumElts, Amt);
1286  }
1287 
1288  Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1289  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1290  Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
1291 
1292  if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
1293  Value *VecSrc = CI.getOperand(2);
1294  Value *Mask = CI.getOperand(3);
1295  Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1296  }
1297  return Res;
1298 }
1299 
1300 static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallInst &CI, unsigned Imm,
1301  bool IsSigned) {
1302  Type *Ty = CI.getType();
1303  Value *LHS = CI.getArgOperand(0);
1304  Value *RHS = CI.getArgOperand(1);
1305 
1306  CmpInst::Predicate Pred;
1307  switch (Imm) {
1308  case 0x0:
1309  Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1310  break;
1311  case 0x1:
1312  Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1313  break;
1314  case 0x2:
1315  Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1316  break;
1317  case 0x3:
1318  Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1319  break;
1320  case 0x4:
1321  Pred = ICmpInst::ICMP_EQ;
1322  break;
1323  case 0x5:
1324  Pred = ICmpInst::ICMP_NE;
1325  break;
1326  case 0x6:
1327  return Constant::getNullValue(Ty); // FALSE
1328  case 0x7:
1329  return Constant::getAllOnesValue(Ty); // TRUE
1330  default:
1331  llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1332  }
1333 
1334  Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1335  Value *Ext = Builder.CreateSExt(Cmp, Ty);
1336  return Ext;
1337 }
1338 
1340  bool IsShiftRight, bool ZeroMask) {
1341  Type *Ty = CI.getType();
1342  Value *Op0 = CI.getArgOperand(0);
1343  Value *Op1 = CI.getArgOperand(1);
1344  Value *Amt = CI.getArgOperand(2);
1345 
1346  if (IsShiftRight)
1347  std::swap(Op0, Op1);
1348 
1349  // Amount may be scalar immediate, in which case create a splat vector.
1350  // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1351  // we only care about the lowest log2 bits anyway.
1352  if (Amt->getType() != Ty) {
1353  unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1354  Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1355  Amt = Builder.CreateVectorSplat(NumElts, Amt);
1356  }
1357 
1358  Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1359  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1360  Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
1361 
1362  unsigned NumArgs = CI.getNumArgOperands();
1363  if (NumArgs >= 4) { // For masked intrinsics.
1364  Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1365  ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
1366  CI.getArgOperand(0);
1367  Value *Mask = CI.getOperand(NumArgs - 1);
1368  Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1369  }
1370  return Res;
1371 }
1372 
1374  Value *Ptr, Value *Data, Value *Mask,
1375  bool Aligned) {
1376  // Cast the pointer to the right type.
1377  Ptr = Builder.CreateBitCast(Ptr,
1378  llvm::PointerType::getUnqual(Data->getType()));
1379  const Align Alignment =
1380  Aligned
1381  ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedSize() / 8)
1382  : Align(1);
1383 
1384  // If the mask is all ones just emit a regular store.
1385  if (const auto *C = dyn_cast<Constant>(Mask))
1386  if (C->isAllOnesValue())
1387  return Builder.CreateAlignedStore(Data, Ptr, Alignment);
1388 
1389  // Convert the mask from an integer type to a vector of i1.
1390  unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
1391  Mask = getX86MaskVec(Builder, Mask, NumElts);
1392  return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
1393 }
1394 
1396  Value *Ptr, Value *Passthru, Value *Mask,
1397  bool Aligned) {
1398  Type *ValTy = Passthru->getType();
1399  // Cast the pointer to the right type.
1400  Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy));
1401  const Align Alignment =
1402  Aligned
1403  ? Align(Passthru->getType()->getPrimitiveSizeInBits().getFixedSize() /
1404  8)
1405  : Align(1);
1406 
1407  // If the mask is all ones just emit a regular store.
1408  if (const auto *C = dyn_cast<Constant>(Mask))
1409  if (C->isAllOnesValue())
1410  return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
1411 
1412  // Convert the mask from an integer type to a vector of i1.
1413  unsigned NumElts =
1414  cast<FixedVectorType>(Passthru->getType())->getNumElements();
1415  Mask = getX86MaskVec(Builder, Mask, NumElts);
1416  return Builder.CreateMaskedLoad(Ptr, Alignment, Mask, Passthru);
1417 }
1418 
1420  Type *Ty = CI.getType();
1421  Value *Op0 = CI.getArgOperand(0);
1423  Value *Res = Builder.CreateCall(F, {Op0, Builder.getInt1(false)});
1424  if (CI.getNumArgOperands() == 3)
1425  Res = EmitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
1426  return Res;
1427 }
1428 
1429 static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) {
1430  Type *Ty = CI.getType();
1431 
1432  // Arguments have a vXi32 type so cast to vXi64.
1433  Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1434  Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1435 
1436  if (IsSigned) {
1437  // Shift left then arithmetic shift right.
1438  Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1439  LHS = Builder.CreateShl(LHS, ShiftAmt);
1440  LHS = Builder.CreateAShr(LHS, ShiftAmt);
1441  RHS = Builder.CreateShl(RHS, ShiftAmt);
1442  RHS = Builder.CreateAShr(RHS, ShiftAmt);
1443  } else {
1444  // Clear the upper bits.
1445  Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1446  LHS = Builder.CreateAnd(LHS, Mask);
1447  RHS = Builder.CreateAnd(RHS, Mask);
1448  }
1449 
1450  Value *Res = Builder.CreateMul(LHS, RHS);
1451 
1452  if (CI.getNumArgOperands() == 4)
1453  Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1454 
1455  return Res;
1456 }
1457 
1458 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1460  Value *Mask) {
1461  unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
1462  if (Mask) {
1463  const auto *C = dyn_cast<Constant>(Mask);
1464  if (!C || !C->isAllOnesValue())
1465  Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1466  }
1467 
1468  if (NumElts < 8) {
1469  int Indices[8];
1470  for (unsigned i = 0; i != NumElts; ++i)
1471  Indices[i] = i;
1472  for (unsigned i = NumElts; i != 8; ++i)
1473  Indices[i] = NumElts + i % NumElts;
1474  Vec = Builder.CreateShuffleVector(Vec,
1476  Indices);
1477  }
1478  return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1479 }
1480 
1482  unsigned CC, bool Signed) {
1483  Value *Op0 = CI.getArgOperand(0);
1484  unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1485 
1486  Value *Cmp;
1487  if (CC == 3) {
1488  Cmp = Constant::getNullValue(
1489  FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1490  } else if (CC == 7) {
1492  FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1493  } else {
1494  ICmpInst::Predicate Pred;
1495  switch (CC) {
1496  default: llvm_unreachable("Unknown condition code");
1497  case 0: Pred = ICmpInst::ICMP_EQ; break;
1498  case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1499  case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1500  case 4: Pred = ICmpInst::ICMP_NE; break;
1501  case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1502  case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1503  }
1504  Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
1505  }
1506 
1507  Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1);
1508 
1509  return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
1510 }
1511 
1512 // Replace a masked intrinsic with an older unmasked intrinsic.
1514  Intrinsic::ID IID) {
1515  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
1516  Value *Rep = Builder.CreateCall(Intrin,
1517  { CI.getArgOperand(0), CI.getArgOperand(1) });
1518  return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
1519 }
1520 
1522  Value* A = CI.getArgOperand(0);
1523  Value* B = CI.getArgOperand(1);
1524  Value* Src = CI.getArgOperand(2);
1525  Value* Mask = CI.getArgOperand(3);
1526 
1527  Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
1528  Value* Cmp = Builder.CreateIsNotNull(AndNode);
1529  Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
1530  Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
1531  Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
1532  return Builder.CreateInsertElement(A, Select, (uint64_t)0);
1533 }
1534 
1535 
1537  Value* Op = CI.getArgOperand(0);
1538  Type* ReturnOp = CI.getType();
1539  unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
1540  Value *Mask = getX86MaskVec(Builder, Op, NumElts);
1541  return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1542 }
1543 
1544 // Replace intrinsic with unmasked version and a select.
1546  CallInst &CI, Value *&Rep) {
1547  Name = Name.substr(12); // Remove avx512.mask.
1548 
1549  unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
1550  unsigned EltWidth = CI.getType()->getScalarSizeInBits();
1551  Intrinsic::ID IID;
1552  if (Name.startswith("max.p")) {
1553  if (VecWidth == 128 && EltWidth == 32)
1554  IID = Intrinsic::x86_sse_max_ps;
1555  else if (VecWidth == 128 && EltWidth == 64)
1556  IID = Intrinsic::x86_sse2_max_pd;
1557  else if (VecWidth == 256 && EltWidth == 32)
1558  IID = Intrinsic::x86_avx_max_ps_256;
1559  else if (VecWidth == 256 && EltWidth == 64)
1560  IID = Intrinsic::x86_avx_max_pd_256;
1561  else
1562  llvm_unreachable("Unexpected intrinsic");
1563  } else if (Name.startswith("min.p")) {
1564  if (VecWidth == 128 && EltWidth == 32)
1565  IID = Intrinsic::x86_sse_min_ps;
1566  else if (VecWidth == 128 && EltWidth == 64)
1567  IID = Intrinsic::x86_sse2_min_pd;
1568  else if (VecWidth == 256 && EltWidth == 32)
1569  IID = Intrinsic::x86_avx_min_ps_256;
1570  else if (VecWidth == 256 && EltWidth == 64)
1571  IID = Intrinsic::x86_avx_min_pd_256;
1572  else
1573  llvm_unreachable("Unexpected intrinsic");
1574  } else if (Name.startswith("pshuf.b.")) {
1575  if (VecWidth == 128)
1576  IID = Intrinsic::x86_ssse3_pshuf_b_128;
1577  else if (VecWidth == 256)
1578  IID = Intrinsic::x86_avx2_pshuf_b;
1579  else if (VecWidth == 512)
1580  IID = Intrinsic::x86_avx512_pshuf_b_512;
1581  else
1582  llvm_unreachable("Unexpected intrinsic");
1583  } else if (Name.startswith("pmul.hr.sw.")) {
1584  if (VecWidth == 128)
1585  IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
1586  else if (VecWidth == 256)
1587  IID = Intrinsic::x86_avx2_pmul_hr_sw;
1588  else if (VecWidth == 512)
1589  IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
1590  else
1591  llvm_unreachable("Unexpected intrinsic");
1592  } else if (Name.startswith("pmulh.w.")) {
1593  if (VecWidth == 128)
1594  IID = Intrinsic::x86_sse2_pmulh_w;
1595  else if (VecWidth == 256)
1596  IID = Intrinsic::x86_avx2_pmulh_w;
1597  else if (VecWidth == 512)
1598  IID = Intrinsic::x86_avx512_pmulh_w_512;
1599  else
1600  llvm_unreachable("Unexpected intrinsic");
1601  } else if (Name.startswith("pmulhu.w.")) {
1602  if (VecWidth == 128)
1603  IID = Intrinsic::x86_sse2_pmulhu_w;
1604  else if (VecWidth == 256)
1605  IID = Intrinsic::x86_avx2_pmulhu_w;
1606  else if (VecWidth == 512)
1607  IID = Intrinsic::x86_avx512_pmulhu_w_512;
1608  else
1609  llvm_unreachable("Unexpected intrinsic");
1610  } else if (Name.startswith("pmaddw.d.")) {
1611  if (VecWidth == 128)
1612  IID = Intrinsic::x86_sse2_pmadd_wd;
1613  else if (VecWidth == 256)
1614  IID = Intrinsic::x86_avx2_pmadd_wd;
1615  else if (VecWidth == 512)
1616  IID = Intrinsic::x86_avx512_pmaddw_d_512;
1617  else
1618  llvm_unreachable("Unexpected intrinsic");
1619  } else if (Name.startswith("pmaddubs.w.")) {
1620  if (VecWidth == 128)
1621  IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
1622  else if (VecWidth == 256)
1623  IID = Intrinsic::x86_avx2_pmadd_ub_sw;
1624  else if (VecWidth == 512)
1625  IID = Intrinsic::x86_avx512_pmaddubs_w_512;
1626  else
1627  llvm_unreachable("Unexpected intrinsic");
1628  } else if (Name.startswith("packsswb.")) {
1629  if (VecWidth == 128)
1630  IID = Intrinsic::x86_sse2_packsswb_128;
1631  else if (VecWidth == 256)
1632  IID = Intrinsic::x86_avx2_packsswb;
1633  else if (VecWidth == 512)
1634  IID = Intrinsic::x86_avx512_packsswb_512;
1635  else
1636  llvm_unreachable("Unexpected intrinsic");
1637  } else if (Name.startswith("packssdw.")) {
1638  if (VecWidth == 128)
1639  IID = Intrinsic::x86_sse2_packssdw_128;
1640  else if (VecWidth == 256)
1641  IID = Intrinsic::x86_avx2_packssdw;
1642  else if (VecWidth == 512)
1643  IID = Intrinsic::x86_avx512_packssdw_512;
1644  else
1645  llvm_unreachable("Unexpected intrinsic");
1646  } else if (Name.startswith("packuswb.")) {
1647  if (VecWidth == 128)
1648  IID = Intrinsic::x86_sse2_packuswb_128;
1649  else if (VecWidth == 256)
1650  IID = Intrinsic::x86_avx2_packuswb;
1651  else if (VecWidth == 512)
1652  IID = Intrinsic::x86_avx512_packuswb_512;
1653  else
1654  llvm_unreachable("Unexpected intrinsic");
1655  } else if (Name.startswith("packusdw.")) {
1656  if (VecWidth == 128)
1657  IID = Intrinsic::x86_sse41_packusdw;
1658  else if (VecWidth == 256)
1659  IID = Intrinsic::x86_avx2_packusdw;
1660  else if (VecWidth == 512)
1661  IID = Intrinsic::x86_avx512_packusdw_512;
1662  else
1663  llvm_unreachable("Unexpected intrinsic");
1664  } else if (Name.startswith("vpermilvar.")) {
1665  if (VecWidth == 128 && EltWidth == 32)
1666  IID = Intrinsic::x86_avx_vpermilvar_ps;
1667  else if (VecWidth == 128 && EltWidth == 64)
1668  IID = Intrinsic::x86_avx_vpermilvar_pd;
1669  else if (VecWidth == 256 && EltWidth == 32)
1670  IID = Intrinsic::x86_avx_vpermilvar_ps_256;
1671  else if (VecWidth == 256 && EltWidth == 64)
1672  IID = Intrinsic::x86_avx_vpermilvar_pd_256;
1673  else if (VecWidth == 512 && EltWidth == 32)
1674  IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
1675  else if (VecWidth == 512 && EltWidth == 64)
1676  IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
1677  else
1678  llvm_unreachable("Unexpected intrinsic");
1679  } else if (Name == "cvtpd2dq.256") {
1680  IID = Intrinsic::x86_avx_cvt_pd2dq_256;
1681  } else if (Name == "cvtpd2ps.256") {
1682  IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
1683  } else if (Name == "cvttpd2dq.256") {
1684  IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
1685  } else if (Name == "cvttps2dq.128") {
1686  IID = Intrinsic::x86_sse2_cvttps2dq;
1687  } else if (Name == "cvttps2dq.256") {
1688  IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
1689  } else if (Name.startswith("permvar.")) {
1690  bool IsFloat = CI.getType()->isFPOrFPVectorTy();
1691  if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1692  IID = Intrinsic::x86_avx2_permps;
1693  else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1694  IID = Intrinsic::x86_avx2_permd;
1695  else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1696  IID = Intrinsic::x86_avx512_permvar_df_256;
1697  else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1698  IID = Intrinsic::x86_avx512_permvar_di_256;
1699  else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1700  IID = Intrinsic::x86_avx512_permvar_sf_512;
1701  else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1702  IID = Intrinsic::x86_avx512_permvar_si_512;
1703  else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1704  IID = Intrinsic::x86_avx512_permvar_df_512;
1705  else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1706  IID = Intrinsic::x86_avx512_permvar_di_512;
1707  else if (VecWidth == 128 && EltWidth == 16)
1708  IID = Intrinsic::x86_avx512_permvar_hi_128;
1709  else if (VecWidth == 256 && EltWidth == 16)
1710  IID = Intrinsic::x86_avx512_permvar_hi_256;
1711  else if (VecWidth == 512 && EltWidth == 16)
1712  IID = Intrinsic::x86_avx512_permvar_hi_512;
1713  else if (VecWidth == 128 && EltWidth == 8)
1714  IID = Intrinsic::x86_avx512_permvar_qi_128;
1715  else if (VecWidth == 256 && EltWidth == 8)
1716  IID = Intrinsic::x86_avx512_permvar_qi_256;
1717  else if (VecWidth == 512 && EltWidth == 8)
1718  IID = Intrinsic::x86_avx512_permvar_qi_512;
1719  else
1720  llvm_unreachable("Unexpected intrinsic");
1721  } else if (Name.startswith("dbpsadbw.")) {
1722  if (VecWidth == 128)
1723  IID = Intrinsic::x86_avx512_dbpsadbw_128;
1724  else if (VecWidth == 256)
1725  IID = Intrinsic::x86_avx512_dbpsadbw_256;
1726  else if (VecWidth == 512)
1727  IID = Intrinsic::x86_avx512_dbpsadbw_512;
1728  else
1729  llvm_unreachable("Unexpected intrinsic");
1730  } else if (Name.startswith("pmultishift.qb.")) {
1731  if (VecWidth == 128)
1732  IID = Intrinsic::x86_avx512_pmultishift_qb_128;
1733  else if (VecWidth == 256)
1734  IID = Intrinsic::x86_avx512_pmultishift_qb_256;
1735  else if (VecWidth == 512)
1736  IID = Intrinsic::x86_avx512_pmultishift_qb_512;
1737  else
1738  llvm_unreachable("Unexpected intrinsic");
1739  } else if (Name.startswith("conflict.")) {
1740  if (Name[9] == 'd' && VecWidth == 128)
1741  IID = Intrinsic::x86_avx512_conflict_d_128;
1742  else if (Name[9] == 'd' && VecWidth == 256)
1743  IID = Intrinsic::x86_avx512_conflict_d_256;
1744  else if (Name[9] == 'd' && VecWidth == 512)
1745  IID = Intrinsic::x86_avx512_conflict_d_512;
1746  else if (Name[9] == 'q' && VecWidth == 128)
1747  IID = Intrinsic::x86_avx512_conflict_q_128;
1748  else if (Name[9] == 'q' && VecWidth == 256)
1749  IID = Intrinsic::x86_avx512_conflict_q_256;
1750  else if (Name[9] == 'q' && VecWidth == 512)
1751  IID = Intrinsic::x86_avx512_conflict_q_512;
1752  else
1753  llvm_unreachable("Unexpected intrinsic");
1754  } else if (Name.startswith("pavg.")) {
1755  if (Name[5] == 'b' && VecWidth == 128)
1756  IID = Intrinsic::x86_sse2_pavg_b;
1757  else if (Name[5] == 'b' && VecWidth == 256)
1758  IID = Intrinsic::x86_avx2_pavg_b;
1759  else if (Name[5] == 'b' && VecWidth == 512)
1760  IID = Intrinsic::x86_avx512_pavg_b_512;
1761  else if (Name[5] == 'w' && VecWidth == 128)
1762  IID = Intrinsic::x86_sse2_pavg_w;
1763  else if (Name[5] == 'w' && VecWidth == 256)
1764  IID = Intrinsic::x86_avx2_pavg_w;
1765  else if (Name[5] == 'w' && VecWidth == 512)
1766  IID = Intrinsic::x86_avx512_pavg_w_512;
1767  else
1768  llvm_unreachable("Unexpected intrinsic");
1769  } else
1770  return false;
1771 
1773  CI.arg_operands().end());
1774  Args.pop_back();
1775  Args.pop_back();
1776  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1777  Args);
1778  unsigned NumArgs = CI.getNumArgOperands();
1779  Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
1780  CI.getArgOperand(NumArgs - 2));
1781  return true;
1782 }
1783 
1784 /// Upgrade comment in call to inline asm that represents an objc retain release
1785 /// marker.
1786 void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
1787  size_t Pos;
1788  if (AsmStr->find("mov\tfp") == 0 &&
1789  AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
1790  (Pos = AsmStr->find("# marker")) != std::string::npos) {
1791  AsmStr->replace(Pos, 1, ";");
1792  }
1793 }
1794 
1795 /// Upgrade a call to an old intrinsic. All argument and return casting must be
1796 /// provided to seamlessly integrate with existing context.
1798  Function *F = CI->getCalledFunction();
1799  LLVMContext &C = CI->getContext();
1801  Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
1802 
1803  assert(F && "Intrinsic call is not direct?");
1804 
1805  if (!NewFn) {
1806  // Get the Function's name.
1807  StringRef Name = F->getName();
1808 
1809  assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
1810  Name = Name.substr(5);
1811 
1812  bool IsX86 = Name.startswith("x86.");
1813  if (IsX86)
1814  Name = Name.substr(4);
1815  bool IsNVVM = Name.startswith("nvvm.");
1816  if (IsNVVM)
1817  Name = Name.substr(5);
1818 
1819  if (IsX86 && Name.startswith("sse4a.movnt.")) {
1820  Module *M = F->getParent();
1822  Elts.push_back(
1824  MDNode *Node = MDNode::get(C, Elts);
1825 
1826  Value *Arg0 = CI->getArgOperand(0);
1827  Value *Arg1 = CI->getArgOperand(1);
1828 
1829  // Nontemporal (unaligned) store of the 0'th element of the float/double
1830  // vector.
1831  Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
1832  PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
1833  Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
1834  Value *Extract =
1835  Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
1836 
1837  StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, Align(1));
1838  SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1839 
1840  // Remove intrinsic.
1841  CI->eraseFromParent();
1842  return;
1843  }
1844 
1845  if (IsX86 && (Name.startswith("avx.movnt.") ||
1846  Name.startswith("avx512.storent."))) {
1847  Module *M = F->getParent();
1849  Elts.push_back(
1851  MDNode *Node = MDNode::get(C, Elts);
1852 
1853  Value *Arg0 = CI->getArgOperand(0);
1854  Value *Arg1 = CI->getArgOperand(1);
1855 
1856  // Convert the type of the pointer to a pointer to the stored type.
1857  Value *BC = Builder.CreateBitCast(Arg0,
1859  "cast");
1860  StoreInst *SI = Builder.CreateAlignedStore(
1861  Arg1, BC,
1862  Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedSize() / 8));
1863  SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1864 
1865  // Remove intrinsic.
1866  CI->eraseFromParent();
1867  return;
1868  }
1869 
1870  if (IsX86 && Name == "sse2.storel.dq") {
1871  Value *Arg0 = CI->getArgOperand(0);
1872  Value *Arg1 = CI->getArgOperand(1);
1873 
1874  auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
1875  Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
1876  Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
1877  Value *BC = Builder.CreateBitCast(Arg0,
1879  "cast");
1880  Builder.CreateAlignedStore(Elt, BC, Align(1));
1881 
1882  // Remove intrinsic.
1883  CI->eraseFromParent();
1884  return;
1885  }
1886 
1887  if (IsX86 && (Name.startswith("sse.storeu.") ||
1888  Name.startswith("sse2.storeu.") ||
1889  Name.startswith("avx.storeu."))) {
1890  Value *Arg0 = CI->getArgOperand(0);
1891  Value *Arg1 = CI->getArgOperand(1);
1892 
1893  Arg0 = Builder.CreateBitCast(Arg0,
1895  "cast");
1896  Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
1897 
1898  // Remove intrinsic.
1899  CI->eraseFromParent();
1900  return;
1901  }
1902 
1903  if (IsX86 && Name == "avx512.mask.store.ss") {
1904  Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
1906  Mask, false);
1907 
1908  // Remove intrinsic.
1909  CI->eraseFromParent();
1910  return;
1911  }
1912 
1913  if (IsX86 && (Name.startswith("avx512.mask.store"))) {
1914  // "avx512.mask.storeu." or "avx512.mask.store."
1915  bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
1917  CI->getArgOperand(2), Aligned);
1918 
1919  // Remove intrinsic.
1920  CI->eraseFromParent();
1921  return;
1922  }
1923 
1924  Value *Rep;
1925  // Upgrade packed integer vector compare intrinsics to compare instructions.
1926  if (IsX86 && (Name.startswith("sse2.pcmp") ||
1927  Name.startswith("avx2.pcmp"))) {
1928  // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
1929  bool CmpEq = Name[9] == 'e';
1930  Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
1931  CI->getArgOperand(0), CI->getArgOperand(1));
1932  Rep = Builder.CreateSExt(Rep, CI->getType(), "");
1933  } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {
1934  Type *ExtTy = Type::getInt32Ty(C);
1935  if (CI->getOperand(0)->getType()->isIntegerTy(8))
1936  ExtTy = Type::getInt64Ty(C);
1937  unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
1938  ExtTy->getPrimitiveSizeInBits();
1939  Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
1940  Rep = Builder.CreateVectorSplat(NumElts, Rep);
1941  } else if (IsX86 && (Name == "sse.sqrt.ss" ||
1942  Name == "sse2.sqrt.sd")) {
1943  Value *Vec = CI->getArgOperand(0);
1944  Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
1945  Function *Intr = Intrinsic::getDeclaration(F->getParent(),
1946  Intrinsic::sqrt, Elt0->getType());
1947  Elt0 = Builder.CreateCall(Intr, Elt0);
1948  Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
1949  } else if (IsX86 && (Name.startswith("avx.sqrt.p") ||
1950  Name.startswith("sse2.sqrt.p") ||
1951  Name.startswith("sse.sqrt.p"))) {
1952  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1953  Intrinsic::sqrt,
1954  CI->getType()),
1955  {CI->getArgOperand(0)});
1956  } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) {
1957  if (CI->getNumArgOperands() == 4 &&
1958  (!isa<ConstantInt>(CI->getArgOperand(3)) ||
1959  cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
1960  Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
1961  : Intrinsic::x86_avx512_sqrt_pd_512;
1962 
1963  Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
1964  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
1965  IID), Args);
1966  } else {
1967  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1968  Intrinsic::sqrt,
1969  CI->getType()),
1970  {CI->getArgOperand(0)});
1971  }
1972  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1973  CI->getArgOperand(1));
1974  } else if (IsX86 && (Name.startswith("avx512.ptestm") ||
1975  Name.startswith("avx512.ptestnm"))) {
1976  Value *Op0 = CI->getArgOperand(0);
1977  Value *Op1 = CI->getArgOperand(1);
1978  Value *Mask = CI->getArgOperand(2);
1979  Rep = Builder.CreateAnd(Op0, Op1);
1980  llvm::Type *Ty = Op0->getType();
1981  Value *Zero = llvm::Constant::getNullValue(Ty);
1982  ICmpInst::Predicate Pred =
1983  Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1984  Rep = Builder.CreateICmp(Pred, Rep, Zero);
1985  Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
1986  } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
1987  unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
1988  ->getNumElements();
1989  Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
1990  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1991  CI->getArgOperand(1));
1992  } else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
1993  unsigned NumElts = CI->getType()->getScalarSizeInBits();
1994  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
1995  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
1996  int Indices[64];
1997  for (unsigned i = 0; i != NumElts; ++i)
1998  Indices[i] = i;
1999 
2000  // First extract half of each vector. This gives better codegen than
2001  // doing it in a single shuffle.
2002  LHS = Builder.CreateShuffleVector(LHS, LHS,
2003  makeArrayRef(Indices, NumElts / 2));
2004  RHS = Builder.CreateShuffleVector(RHS, RHS,
2005  makeArrayRef(Indices, NumElts / 2));
2006  // Concat the vectors.
2007  // NOTE: Operands have to be swapped to match intrinsic definition.
2008  Rep = Builder.CreateShuffleVector(RHS, LHS,
2009  makeArrayRef(Indices, NumElts));
2010  Rep = Builder.CreateBitCast(Rep, CI->getType());
2011  } else if (IsX86 && Name == "avx512.kand.w") {
2012  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2013  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2014  Rep = Builder.CreateAnd(LHS, RHS);
2015  Rep = Builder.CreateBitCast(Rep, CI->getType());
2016  } else if (IsX86 && Name == "avx512.kandn.w") {
2017  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2018  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2019  LHS = Builder.CreateNot(LHS);
2020  Rep = Builder.CreateAnd(LHS, RHS);
2021  Rep = Builder.CreateBitCast(Rep, CI->getType());
2022  } else if (IsX86 && Name == "avx512.kor.w") {
2023  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2024  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2025  Rep = Builder.CreateOr(LHS, RHS);
2026  Rep = Builder.CreateBitCast(Rep, CI->getType());
2027  } else if (IsX86 && Name == "avx512.kxor.w") {
2028  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2029  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2030  Rep = Builder.CreateXor(LHS, RHS);
2031  Rep = Builder.CreateBitCast(Rep, CI->getType());
2032  } else if (IsX86 && Name == "avx512.kxnor.w") {
2033  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2034  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2035  LHS = Builder.CreateNot(LHS);
2036  Rep = Builder.CreateXor(LHS, RHS);
2037  Rep = Builder.CreateBitCast(Rep, CI->getType());
2038  } else if (IsX86 && Name == "avx512.knot.w") {
2039  Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2040  Rep = Builder.CreateNot(Rep);
2041  Rep = Builder.CreateBitCast(Rep, CI->getType());
2042  } else if (IsX86 &&
2043  (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
2044  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2045  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2046  Rep = Builder.CreateOr(LHS, RHS);
2047  Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2048  Value *C;
2049  if (Name[14] == 'c')
2050  C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2051  else
2052  C = ConstantInt::getNullValue(Builder.getInt16Ty());
2053  Rep = Builder.CreateICmpEQ(Rep, C);
2054  Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2055  } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2056  Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2057  Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2058  Name == "sse.div.ss" || Name == "sse2.div.sd")) {
2059  Type *I32Ty = Type::getInt32Ty(C);
2060  Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2061  ConstantInt::get(I32Ty, 0));
2062  Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2063  ConstantInt::get(I32Ty, 0));
2064  Value *EltOp;
2065  if (Name.contains(".add."))
2066  EltOp = Builder.CreateFAdd(Elt0, Elt1);
2067  else if (Name.contains(".sub."))
2068  EltOp = Builder.CreateFSub(Elt0, Elt1);
2069  else if (Name.contains(".mul."))
2070  EltOp = Builder.CreateFMul(Elt0, Elt1);
2071  else
2072  EltOp = Builder.CreateFDiv(Elt0, Elt1);
2073  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2074  ConstantInt::get(I32Ty, 0));
2075  } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
2076  // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2077  bool CmpEq = Name[16] == 'e';
2078  Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2079  } else if (IsX86 && Name.startswith("avx512.mask.vpshufbitqmb.")) {
2080  Type *OpTy = CI->getArgOperand(0)->getType();
2081  unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2082  Intrinsic::ID IID;
2083  switch (VecWidth) {
2084  default: llvm_unreachable("Unexpected intrinsic");
2085  case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break;
2086  case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break;
2087  case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break;
2088  }
2089 
2090  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2091  { CI->getOperand(0), CI->getArgOperand(1) });
2092  Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2093  } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
2094  Type *OpTy = CI->getArgOperand(0)->getType();
2095  unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2096  unsigned EltWidth = OpTy->getScalarSizeInBits();
2097  Intrinsic::ID IID;
2098  if (VecWidth == 128 && EltWidth == 32)
2099  IID = Intrinsic::x86_avx512_fpclass_ps_128;
2100  else if (VecWidth == 256 && EltWidth == 32)
2101  IID = Intrinsic::x86_avx512_fpclass_ps_256;
2102  else if (VecWidth == 512 && EltWidth == 32)
2103  IID = Intrinsic::x86_avx512_fpclass_ps_512;
2104  else if (VecWidth == 128 && EltWidth == 64)
2105  IID = Intrinsic::x86_avx512_fpclass_pd_128;
2106  else if (VecWidth == 256 && EltWidth == 64)
2107  IID = Intrinsic::x86_avx512_fpclass_pd_256;
2108  else if (VecWidth == 512 && EltWidth == 64)
2109  IID = Intrinsic::x86_avx512_fpclass_pd_512;
2110  else
2111  llvm_unreachable("Unexpected intrinsic");
2112 
2113  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2114  { CI->getOperand(0), CI->getArgOperand(1) });
2115  Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2116  } else if (IsX86 && Name.startswith("avx512.cmp.p")) {
2118  CI->arg_operands().end());
2119  Type *OpTy = Args[0]->getType();
2120  unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2121  unsigned EltWidth = OpTy->getScalarSizeInBits();
2122  Intrinsic::ID IID;
2123  if (VecWidth == 128 && EltWidth == 32)
2124  IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2125  else if (VecWidth == 256 && EltWidth == 32)
2126  IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2127  else if (VecWidth == 512 && EltWidth == 32)
2128  IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2129  else if (VecWidth == 128 && EltWidth == 64)
2130  IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2131  else if (VecWidth == 256 && EltWidth == 64)
2132  IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2133  else if (VecWidth == 512 && EltWidth == 64)
2134  IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2135  else
2136  llvm_unreachable("Unexpected intrinsic");
2137 
2139  if (VecWidth == 512)
2140  std::swap(Mask, Args.back());
2141  Args.push_back(Mask);
2142 
2143  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2144  Args);
2145  } else if (IsX86 && Name.startswith("avx512.mask.cmp.")) {
2146  // Integer compare intrinsics.
2147  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2148  Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2149  } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) {
2150  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2151  Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2152  } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") ||
2153  Name.startswith("avx512.cvtw2mask.") ||
2154  Name.startswith("avx512.cvtd2mask.") ||
2155  Name.startswith("avx512.cvtq2mask."))) {
2156  Value *Op = CI->getArgOperand(0);
2157  Value *Zero = llvm::Constant::getNullValue(Op->getType());
2158  Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2159  Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2160  } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
2161  Name == "ssse3.pabs.w.128" ||
2162  Name == "ssse3.pabs.d.128" ||
2163  Name.startswith("avx2.pabs") ||
2164  Name.startswith("avx512.mask.pabs"))) {
2165  Rep = upgradeAbs(Builder, *CI);
2166  } else if (IsX86 && (Name == "sse41.pmaxsb" ||
2167  Name == "sse2.pmaxs.w" ||
2168  Name == "sse41.pmaxsd" ||
2169  Name.startswith("avx2.pmaxs") ||
2170  Name.startswith("avx512.mask.pmaxs"))) {
2172  } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
2173  Name == "sse41.pmaxuw" ||
2174  Name == "sse41.pmaxud" ||
2175  Name.startswith("avx2.pmaxu") ||
2176  Name.startswith("avx512.mask.pmaxu"))) {
2178  } else if (IsX86 && (Name == "sse41.pminsb" ||
2179  Name == "sse2.pmins.w" ||
2180  Name == "sse41.pminsd" ||
2181  Name.startswith("avx2.pmins") ||
2182  Name.startswith("avx512.mask.pmins"))) {
2184  } else if (IsX86 && (Name == "sse2.pminu.b" ||
2185  Name == "sse41.pminuw" ||
2186  Name == "sse41.pminud" ||
2187  Name.startswith("avx2.pminu") ||
2188  Name.startswith("avx512.mask.pminu"))) {
2190  } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
2191  Name == "avx2.pmulu.dq" ||
2192  Name == "avx512.pmulu.dq.512" ||
2193  Name.startswith("avx512.mask.pmulu.dq."))) {
2194  Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
2195  } else if (IsX86 && (Name == "sse41.pmuldq" ||
2196  Name == "avx2.pmul.dq" ||
2197  Name == "avx512.pmul.dq.512" ||
2198  Name.startswith("avx512.mask.pmul.dq."))) {
2199  Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
2200  } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
2201  Name == "sse2.cvtsi2sd" ||
2202  Name == "sse.cvtsi642ss" ||
2203  Name == "sse2.cvtsi642sd")) {
2204  Rep = Builder.CreateSIToFP(
2205  CI->getArgOperand(1),
2206  cast<VectorType>(CI->getType())->getElementType());
2207  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2208  } else if (IsX86 && Name == "avx512.cvtusi2sd") {
2209  Rep = Builder.CreateUIToFP(
2210  CI->getArgOperand(1),
2211  cast<VectorType>(CI->getType())->getElementType());
2212  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2213  } else if (IsX86 && Name == "sse2.cvtss2sd") {
2214  Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2215  Rep = Builder.CreateFPExt(
2216  Rep, cast<VectorType>(CI->getType())->getElementType());
2217  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2218  } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
2219  Name == "sse2.cvtdq2ps" ||
2220  Name == "avx.cvtdq2.pd.256" ||
2221  Name == "avx.cvtdq2.ps.256" ||
2222  Name.startswith("avx512.mask.cvtdq2pd.") ||
2223  Name.startswith("avx512.mask.cvtudq2pd.") ||
2224  Name.startswith("avx512.mask.cvtdq2ps.") ||
2225  Name.startswith("avx512.mask.cvtudq2ps.") ||
2226  Name.startswith("avx512.mask.cvtqq2pd.") ||
2227  Name.startswith("avx512.mask.cvtuqq2pd.") ||
2228  Name == "avx512.mask.cvtqq2ps.256" ||
2229  Name == "avx512.mask.cvtqq2ps.512" ||
2230  Name == "avx512.mask.cvtuqq2ps.256" ||
2231  Name == "avx512.mask.cvtuqq2ps.512" ||
2232  Name == "sse2.cvtps2pd" ||
2233  Name == "avx.cvt.ps2.pd.256" ||
2234  Name == "avx512.mask.cvtps2pd.128" ||
2235  Name == "avx512.mask.cvtps2pd.256")) {
2236  auto *DstTy = cast<FixedVectorType>(CI->getType());
2237  Rep = CI->getArgOperand(0);
2238  auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2239 
2240  unsigned NumDstElts = DstTy->getNumElements();
2241  if (NumDstElts < SrcTy->getNumElements()) {
2242  assert(NumDstElts == 2 && "Unexpected vector size");
2243  Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
2244  }
2245 
2246  bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2247  bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
2248  if (IsPS2PD)
2249  Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2250  else if (CI->getNumArgOperands() == 4 &&
2251  (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2252  cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2253  Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2254  : Intrinsic::x86_avx512_sitofp_round;
2256  { DstTy, SrcTy });
2257  Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) });
2258  } else {
2259  Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2260  : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2261  }
2262 
2263  if (CI->getNumArgOperands() >= 3)
2264  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2265  CI->getArgOperand(1));
2266  } else if (IsX86 && (Name.startswith("avx512.mask.vcvtph2ps.") ||
2267  Name.startswith("vcvtph2ps."))) {
2268  auto *DstTy = cast<FixedVectorType>(CI->getType());
2269  Rep = CI->getArgOperand(0);
2270  auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2271  unsigned NumDstElts = DstTy->getNumElements();
2272  if (NumDstElts != SrcTy->getNumElements()) {
2273  assert(NumDstElts == 4 && "Unexpected vector size");
2274  Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
2275  }
2276  Rep = Builder.CreateBitCast(
2277  Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
2278  Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
2279  if (CI->getNumArgOperands() >= 3)
2280  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2281  CI->getArgOperand(1));
2282  } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
2283  Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
2284  CI->getArgOperand(1), CI->getArgOperand(2),
2285  /*Aligned*/false);
2286  } else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
2287  Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
2288  CI->getArgOperand(1),CI->getArgOperand(2),
2289  /*Aligned*/true);
2290  } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
2291  auto *ResultTy = cast<FixedVectorType>(CI->getType());
2292  Type *PtrTy = ResultTy->getElementType();
2293 
2294  // Cast the pointer to element type.
2295  Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2297 
2298  Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2299  ResultTy->getNumElements());
2300 
2301  Function *ELd = Intrinsic::getDeclaration(F->getParent(),
2302  Intrinsic::masked_expandload,
2303  ResultTy);
2304  Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
2305  } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) {
2306  auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
2307  Type *PtrTy = ResultTy->getElementType();
2308 
2309  // Cast the pointer to element type.
2310  Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2312 
2313  Value *MaskVec =
2315  cast<FixedVectorType>(ResultTy)->getNumElements());
2316 
2317  Function *CSt = Intrinsic::getDeclaration(F->getParent(),
2318  Intrinsic::masked_compressstore,
2319  ResultTy);
2320  Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
2321  } else if (IsX86 && (Name.startswith("avx512.mask.compress.") ||
2322  Name.startswith("avx512.mask.expand."))) {
2323  auto *ResultTy = cast<FixedVectorType>(CI->getType());
2324 
2325  Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2326  ResultTy->getNumElements());
2327 
2328  bool IsCompress = Name[12] == 'c';
2329  Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2330  : Intrinsic::x86_avx512_mask_expand;
2331  Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
2332  Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1),
2333  MaskVec });
2334  } else if (IsX86 && Name.startswith("xop.vpcom")) {
2335  bool IsSigned;
2336  if (Name.endswith("ub") || Name.endswith("uw") || Name.endswith("ud") ||
2337  Name.endswith("uq"))
2338  IsSigned = false;
2339  else if (Name.endswith("b") || Name.endswith("w") || Name.endswith("d") ||
2340  Name.endswith("q"))
2341  IsSigned = true;
2342  else
2343  llvm_unreachable("Unknown suffix");
2344 
2345  unsigned Imm;
2346  if (CI->getNumArgOperands() == 3) {
2347  Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2348  } else {
2349  Name = Name.substr(9); // strip off "xop.vpcom"
2350  if (Name.startswith("lt"))
2351  Imm = 0;
2352  else if (Name.startswith("le"))
2353  Imm = 1;
2354  else if (Name.startswith("gt"))
2355  Imm = 2;
2356  else if (Name.startswith("ge"))
2357  Imm = 3;
2358  else if (Name.startswith("eq"))
2359  Imm = 4;
2360  else if (Name.startswith("ne"))
2361  Imm = 5;
2362  else if (Name.startswith("false"))
2363  Imm = 6;
2364  else if (Name.startswith("true"))
2365  Imm = 7;
2366  else
2367  llvm_unreachable("Unknown condition");
2368  }
2369 
2370  Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
2371  } else if (IsX86 && Name.startswith("xop.vpcmov")) {
2372  Value *Sel = CI->getArgOperand(2);
2373  Value *NotSel = Builder.CreateNot(Sel);
2374  Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2375  Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2376  Rep = Builder.CreateOr(Sel0, Sel1);
2377  } else if (IsX86 && (Name.startswith("xop.vprot") ||
2378  Name.startswith("avx512.prol") ||
2379  Name.startswith("avx512.mask.prol"))) {
2380  Rep = upgradeX86Rotate(Builder, *CI, false);
2381  } else if (IsX86 && (Name.startswith("avx512.pror") ||
2382  Name.startswith("avx512.mask.pror"))) {
2383  Rep = upgradeX86Rotate(Builder, *CI, true);
2384  } else if (IsX86 && (Name.startswith("avx512.vpshld.") ||
2385  Name.startswith("avx512.mask.vpshld") ||
2386  Name.startswith("avx512.maskz.vpshld"))) {
2387  bool ZeroMask = Name[11] == 'z';
2388  Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2389  } else if (IsX86 && (Name.startswith("avx512.vpshrd.") ||
2390  Name.startswith("avx512.mask.vpshrd") ||
2391  Name.startswith("avx512.maskz.vpshrd"))) {
2392  bool ZeroMask = Name[11] == 'z';
2393  Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
2394  } else if (IsX86 && Name == "sse42.crc32.64.8") {
2395  Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
2396  Intrinsic::x86_sse42_crc32_32_8);
2397  Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2398  Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
2399  Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2400  } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") ||
2401  Name.startswith("avx512.vbroadcast.s"))) {
2402  // Replace broadcasts with a series of insertelements.
2403  auto *VecTy = cast<FixedVectorType>(CI->getType());
2404  Type *EltTy = VecTy->getElementType();
2405  unsigned EltNum = VecTy->getNumElements();
2406  Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
2407  EltTy->getPointerTo());
2408  Value *Load = Builder.CreateLoad(EltTy, Cast);
2409  Type *I32Ty = Type::getInt32Ty(C);
2410  Rep = UndefValue::get(VecTy);
2411  for (unsigned I = 0; I < EltNum; ++I)
2412  Rep = Builder.CreateInsertElement(Rep, Load,
2413  ConstantInt::get(I32Ty, I));
2414  } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
2415  Name.startswith("sse41.pmovzx") ||
2416  Name.startswith("avx2.pmovsx") ||
2417  Name.startswith("avx2.pmovzx") ||
2418  Name.startswith("avx512.mask.pmovsx") ||
2419  Name.startswith("avx512.mask.pmovzx"))) {
2420  auto *DstTy = cast<FixedVectorType>(CI->getType());
2421  unsigned NumDstElts = DstTy->getNumElements();
2422 
2423  // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2424  SmallVector<int, 8> ShuffleMask(NumDstElts);
2425  for (unsigned i = 0; i != NumDstElts; ++i)
2426  ShuffleMask[i] = i;
2427 
2428  Value *SV =
2429  Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
2430 
2431  bool DoSext = (StringRef::npos != Name.find("pmovsx"));
2432  Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
2433  : Builder.CreateZExt(SV, DstTy);
2434  // If there are 3 arguments, it's a masked intrinsic so we need a select.
2435  if (CI->getNumArgOperands() == 3)
2436  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2437  CI->getArgOperand(1));
2438  } else if (Name == "avx512.mask.pmov.qd.256" ||
2439  Name == "avx512.mask.pmov.qd.512" ||
2440  Name == "avx512.mask.pmov.wb.256" ||
2441  Name == "avx512.mask.pmov.wb.512") {
2442  Type *Ty = CI->getArgOperand(1)->getType();
2443  Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
2444  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2445  CI->getArgOperand(1));
2446  } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
2447  Name == "avx2.vbroadcasti128")) {
2448  // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2449  Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
2450  unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2451  auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
2452  Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
2454  Value *Load = Builder.CreateAlignedLoad(VT, Op, Align(1));
2455  if (NumSrcElts == 2)
2456  Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
2457  else
2458  Rep = Builder.CreateShuffleVector(
2459  Load, ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
2460  } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
2461  Name.startswith("avx512.mask.shuf.f"))) {
2462  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2463  Type *VT = CI->getType();
2464  unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2465  unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2466  unsigned ControlBitsMask = NumLanes - 1;
2467  unsigned NumControlBits = NumLanes / 2;
2468  SmallVector<int, 8> ShuffleMask(0);
2469 
2470  for (unsigned l = 0; l != NumLanes; ++l) {
2471  unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2472  // We actually need the other source.
2473  if (l >= NumLanes / 2)
2474  LaneMask += NumLanes;
2475  for (unsigned i = 0; i != NumElementsInLane; ++i)
2476  ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
2477  }
2478  Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2479  CI->getArgOperand(1), ShuffleMask);
2480  Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2481  CI->getArgOperand(3));
2482  }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
2483  Name.startswith("avx512.mask.broadcasti"))) {
2484  unsigned NumSrcElts =
2485  cast<FixedVectorType>(CI->getArgOperand(0)->getType())
2486  ->getNumElements();
2487  unsigned NumDstElts =
2488  cast<FixedVectorType>(CI->getType())->getNumElements();
2489 
2490  SmallVector<int, 8> ShuffleMask(NumDstElts);
2491  for (unsigned i = 0; i != NumDstElts; ++i)
2492  ShuffleMask[i] = i % NumSrcElts;
2493 
2494  Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2495  CI->getArgOperand(0),
2496  ShuffleMask);
2497  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2498  CI->getArgOperand(1));
2499  } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
2500  Name.startswith("avx2.vbroadcast") ||
2501  Name.startswith("avx512.pbroadcast") ||
2502  Name.startswith("avx512.mask.broadcast.s"))) {
2503  // Replace vp?broadcasts with a vector shuffle.
2504  Value *Op = CI->getArgOperand(0);
2505  ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
2506  Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
2509  Rep = Builder.CreateShuffleVector(Op, M);
2510 
2511  if (CI->getNumArgOperands() == 3)
2512  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2513  CI->getArgOperand(1));
2514  } else if (IsX86 && (Name.startswith("sse2.padds.") ||
2515  Name.startswith("avx2.padds.") ||
2516  Name.startswith("avx512.padds.") ||
2517  Name.startswith("avx512.mask.padds."))) {
2518  Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
2519  } else if (IsX86 && (Name.startswith("sse2.psubs.") ||
2520  Name.startswith("avx2.psubs.") ||
2521  Name.startswith("avx512.psubs.") ||
2522  Name.startswith("avx512.mask.psubs."))) {
2523  Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
2524  } else if (IsX86 && (Name.startswith("sse2.paddus.") ||
2525  Name.startswith("avx2.paddus.") ||
2526  Name.startswith("avx512.mask.paddus."))) {
2527  Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
2528  } else if (IsX86 && (Name.startswith("sse2.psubus.") ||
2529  Name.startswith("avx2.psubus.") ||
2530  Name.startswith("avx512.mask.psubus."))) {
2531  Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
2532  } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
2534  CI->getArgOperand(1),
2535  CI->getArgOperand(2),
2536  CI->getArgOperand(3),
2537  CI->getArgOperand(4),
2538  false);
2539  } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
2541  CI->getArgOperand(1),
2542  CI->getArgOperand(2),
2543  CI->getArgOperand(3),
2544  CI->getArgOperand(4),
2545  true);
2546  } else if (IsX86 && (Name == "sse2.psll.dq" ||
2547  Name == "avx2.psll.dq")) {
2548  // 128/256-bit shift left specified in bits.
2549  unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2551  Shift / 8); // Shift is in bits.
2552  } else if (IsX86 && (Name == "sse2.psrl.dq" ||
2553  Name == "avx2.psrl.dq")) {
2554  // 128/256-bit shift right specified in bits.
2555  unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2557  Shift / 8); // Shift is in bits.
2558  } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
2559  Name == "avx2.psll.dq.bs" ||
2560  Name == "avx512.psll.dq.512")) {
2561  // 128/256/512-bit shift left specified in bytes.
2562  unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2564  } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
2565  Name == "avx2.psrl.dq.bs" ||
2566  Name == "avx512.psrl.dq.512")) {
2567  // 128/256/512-bit shift right specified in bytes.
2568  unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2570  } else if (IsX86 && (Name == "sse41.pblendw" ||
2571  Name.startswith("sse41.blendp") ||
2572  Name.startswith("avx.blend.p") ||
2573  Name == "avx2.pblendw" ||
2574  Name.startswith("avx2.pblendd."))) {
2575  Value *Op0 = CI->getArgOperand(0);
2576  Value *Op1 = CI->getArgOperand(1);
2577  unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2578  auto *VecTy = cast<FixedVectorType>(CI->getType());
2579  unsigned NumElts = VecTy->getNumElements();
2580 
2581  SmallVector<int, 16> Idxs(NumElts);
2582  for (unsigned i = 0; i != NumElts; ++i)
2583  Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
2584 
2585  Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2586  } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
2587  Name == "avx2.vinserti128" ||
2588  Name.startswith("avx512.mask.insert"))) {
2589  Value *Op0 = CI->getArgOperand(0);
2590  Value *Op1 = CI->getArgOperand(1);
2591  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2592  unsigned DstNumElts =
2593  cast<FixedVectorType>(CI->getType())->getNumElements();
2594  unsigned SrcNumElts =
2595  cast<FixedVectorType>(Op1->getType())->getNumElements();
2596  unsigned Scale = DstNumElts / SrcNumElts;
2597 
2598  // Mask off the high bits of the immediate value; hardware ignores those.
2599  Imm = Imm % Scale;
2600 
2601  // Extend the second operand into a vector the size of the destination.
2602  SmallVector<int, 8> Idxs(DstNumElts);
2603  for (unsigned i = 0; i != SrcNumElts; ++i)
2604  Idxs[i] = i;
2605  for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
2606  Idxs[i] = SrcNumElts;
2607  Rep = Builder.CreateShuffleVector(Op1, Idxs);
2608 
2609  // Insert the second operand into the first operand.
2610 
2611  // Note that there is no guarantee that instruction lowering will actually
2612  // produce a vinsertf128 instruction for the created shuffles. In
2613  // particular, the 0 immediate case involves no lane changes, so it can
2614  // be handled as a blend.
2615 
2616  // Example of shuffle mask for 32-bit elements:
2617  // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
2618  // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
2619 
2620  // First fill with identify mask.
2621  for (unsigned i = 0; i != DstNumElts; ++i)
2622  Idxs[i] = i;
2623  // Then replace the elements where we need to insert.
2624  for (unsigned i = 0; i != SrcNumElts; ++i)
2625  Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
2626  Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
2627 
2628  // If the intrinsic has a mask operand, handle that.
2629  if (CI->getNumArgOperands() == 5)
2630  Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2631  CI->getArgOperand(3));
2632  } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
2633  Name == "avx2.vextracti128" ||
2634  Name.startswith("avx512.mask.vextract"))) {
2635  Value *Op0 = CI->getArgOperand(0);
2636  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2637  unsigned DstNumElts =
2638  cast<FixedVectorType>(CI->getType())->getNumElements();
2639  unsigned SrcNumElts =
2640  cast<FixedVectorType>(Op0->getType())->getNumElements();
2641  unsigned Scale = SrcNumElts / DstNumElts;
2642 
2643  // Mask off the high bits of the immediate value; hardware ignores those.
2644  Imm = Imm % Scale;
2645 
2646  // Get indexes for the subvector of the input vector.
2647  SmallVector<int, 8> Idxs(DstNumElts);
2648  for (unsigned i = 0; i != DstNumElts; ++i) {
2649  Idxs[i] = i + (Imm * DstNumElts);
2650  }
2651  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2652 
2653  // If the intrinsic has a mask operand, handle that.
2654  if (CI->getNumArgOperands() == 4)
2655  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2656  CI->getArgOperand(2));
2657  } else if (!IsX86 && Name == "stackprotectorcheck") {
2658  Rep = nullptr;
2659  } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
2660  Name.startswith("avx512.mask.perm.di."))) {
2661  Value *Op0 = CI->getArgOperand(0);
2662  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2663  auto *VecTy = cast<FixedVectorType>(CI->getType());
2664  unsigned NumElts = VecTy->getNumElements();
2665 
2666  SmallVector<int, 8> Idxs(NumElts);
2667  for (unsigned i = 0; i != NumElts; ++i)
2668  Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
2669 
2670  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2671 
2672  if (CI->getNumArgOperands() == 4)
2673  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2674  CI->getArgOperand(2));
2675  } else if (IsX86 && (Name.startswith("avx.vperm2f128.") ||
2676  Name == "avx2.vperm2i128")) {
2677  // The immediate permute control byte looks like this:
2678  // [1:0] - select 128 bits from sources for low half of destination
2679  // [2] - ignore
2680  // [3] - zero low half of destination
2681  // [5:4] - select 128 bits from sources for high half of destination
2682  // [6] - ignore
2683  // [7] - zero high half of destination
2684 
2685  uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2686 
2687  unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2688  unsigned HalfSize = NumElts / 2;
2689  SmallVector<int, 8> ShuffleMask(NumElts);
2690 
2691  // Determine which operand(s) are actually in use for this instruction.
2692  Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2693  Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2694 
2695  // If needed, replace operands based on zero mask.
2696  V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
2697  V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
2698 
2699  // Permute low half of result.
2700  unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
2701  for (unsigned i = 0; i < HalfSize; ++i)
2702  ShuffleMask[i] = StartIndex + i;
2703 
2704  // Permute high half of result.
2705  StartIndex = (Imm & 0x10) ? HalfSize : 0;
2706  for (unsigned i = 0; i < HalfSize; ++i)
2707  ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
2708 
2709  Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2710 
2711  } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
2712  Name == "sse2.pshuf.d" ||
2713  Name.startswith("avx512.mask.vpermil.p") ||
2714  Name.startswith("avx512.mask.pshuf.d."))) {
2715  Value *Op0 = CI->getArgOperand(0);
2716  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2717  auto *VecTy = cast<FixedVectorType>(CI->getType());
2718  unsigned NumElts = VecTy->getNumElements();
2719  // Calculate the size of each index in the immediate.
2720  unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
2721  unsigned IdxMask = ((1 << IdxSize) - 1);
2722 
2723  SmallVector<int, 8> Idxs(NumElts);
2724  // Lookup the bits for this element, wrapping around the immediate every
2725  // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
2726  // to offset by the first index of each group.
2727  for (unsigned i = 0; i != NumElts; ++i)
2728  Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
2729 
2730  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2731 
2732  if (CI->getNumArgOperands() == 4)
2733  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2734  CI->getArgOperand(2));
2735  } else if (IsX86 && (Name == "sse2.pshufl.w" ||
2736  Name.startswith("avx512.mask.pshufl.w."))) {
2737  Value *Op0 = CI->getArgOperand(0);
2738  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2739  unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2740 
2741  SmallVector<int, 16> Idxs(NumElts);
2742  for (unsigned l = 0; l != NumElts; l += 8) {
2743  for (unsigned i = 0; i != 4; ++i)
2744  Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
2745  for (unsigned i = 4; i != 8; ++i)
2746  Idxs[i + l] = i + l;
2747  }
2748 
2749  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2750 
2751  if (CI->getNumArgOperands() == 4)
2752  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2753  CI->getArgOperand(2));
2754  } else if (IsX86 && (Name == "sse2.pshufh.w" ||
2755  Name.startswith("avx512.mask.pshufh.w."))) {
2756  Value *Op0 = CI->getArgOperand(0);
2757  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2758  unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2759 
2760  SmallVector<int, 16> Idxs(NumElts);
2761  for (unsigned l = 0; l != NumElts; l += 8) {
2762  for (unsigned i = 0; i != 4; ++i)
2763  Idxs[i + l] = i + l;
2764  for (unsigned i = 0; i != 4; ++i)
2765  Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
2766  }
2767 
2768  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2769 
2770  if (CI->getNumArgOperands() == 4)
2771  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2772  CI->getArgOperand(2));
2773  } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
2774  Value *Op0 = CI->getArgOperand(0);
2775  Value *Op1 = CI->getArgOperand(1);
2776  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2777  unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2778 
2779  unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2780  unsigned HalfLaneElts = NumLaneElts / 2;
2781 
2782  SmallVector<int, 16> Idxs(NumElts);
2783  for (unsigned i = 0; i != NumElts; ++i) {
2784  // Base index is the starting element of the lane.
2785  Idxs[i] = i - (i % NumLaneElts);
2786  // If we are half way through the lane switch to the other source.
2787  if ((i % NumLaneElts) >= HalfLaneElts)
2788  Idxs[i] += NumElts;
2789  // Now select the specific element. By adding HalfLaneElts bits from
2790  // the immediate. Wrapping around the immediate every 8-bits.
2791  Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
2792  }
2793 
2794  Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2795 
2796  Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2797  CI->getArgOperand(3));
2798  } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
2799  Name.startswith("avx512.mask.movshdup") ||
2800  Name.startswith("avx512.mask.movsldup"))) {
2801  Value *Op0 = CI->getArgOperand(0);
2802  unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2803  unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2804 
2805  unsigned Offset = 0;
2806  if (Name.startswith("avx512.mask.movshdup."))
2807  Offset = 1;
2808 
2809  SmallVector<int, 16> Idxs(NumElts);
2810  for (unsigned l = 0; l != NumElts; l += NumLaneElts)
2811  for (unsigned i = 0; i != NumLaneElts; i += 2) {
2812  Idxs[i + l + 0] = i + l + Offset;
2813  Idxs[i + l + 1] = i + l + Offset;
2814  }
2815 
2816  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2817 
2818  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2819  CI->getArgOperand(1));
2820  } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
2821  Name.startswith("avx512.mask.unpckl."))) {
2822  Value *Op0 = CI->getArgOperand(0);
2823  Value *Op1 = CI->getArgOperand(1);
2824  int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2825  int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2826 
2827  SmallVector<int, 64> Idxs(NumElts);
2828  for (int l = 0; l != NumElts; l += NumLaneElts)
2829  for (int i = 0; i != NumLaneElts; ++i)
2830  Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
2831 
2832  Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2833 
2834  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2835  CI->getArgOperand(2));
2836  } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
2837  Name.startswith("avx512.mask.unpckh."))) {
2838  Value *Op0 = CI->getArgOperand(0);
2839  Value *Op1 = CI->getArgOperand(1);
2840  int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2841  int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2842 
2843  SmallVector<int, 64> Idxs(NumElts);
2844  for (int l = 0; l != NumElts; l += NumLaneElts)
2845  for (int i = 0; i != NumLaneElts; ++i)
2846  Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
2847 
2848  Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2849 
2850  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2851  CI->getArgOperand(2));
2852  } else if (IsX86 && (Name.startswith("avx512.mask.and.") ||
2853  Name.startswith("avx512.mask.pand."))) {
2854  VectorType *FTy = cast<VectorType>(CI->getType());
2855  VectorType *ITy = VectorType::getInteger(FTy);
2856  Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2857  Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2858  Rep = Builder.CreateBitCast(Rep, FTy);
2859  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2860  CI->getArgOperand(2));
2861  } else if (IsX86 && (Name.startswith("avx512.mask.andn.") ||
2862  Name.startswith("avx512.mask.pandn."))) {
2863  VectorType *FTy = cast<VectorType>(CI->getType());
2864  VectorType *ITy = VectorType::getInteger(FTy);
2865  Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
2866  Rep = Builder.CreateAnd(Rep,
2867  Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2868  Rep = Builder.CreateBitCast(Rep, FTy);
2869  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2870  CI->getArgOperand(2));
2871  } else if (IsX86 && (Name.startswith("avx512.mask.or.") ||
2872  Name.startswith("avx512.mask.por."))) {
2873  VectorType *FTy = cast<VectorType>(CI->getType());
2874  VectorType *ITy = VectorType::getInteger(FTy);
2875  Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2876  Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2877  Rep = Builder.CreateBitCast(Rep, FTy);
2878  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2879  CI->getArgOperand(2));
2880  } else if (IsX86 && (Name.startswith("avx512.mask.xor.") ||
2881  Name.startswith("avx512.mask.pxor."))) {
2882  VectorType *FTy = cast<VectorType>(CI->getType());
2883  VectorType *ITy = VectorType::getInteger(FTy);
2884  Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2885  Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2886  Rep = Builder.CreateBitCast(Rep, FTy);
2887  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2888  CI->getArgOperand(2));
2889  } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
2890  Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2891  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2892  CI->getArgOperand(2));
2893  } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
2894  Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
2895  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2896  CI->getArgOperand(2));
2897  } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
2898  Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
2899  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2900  CI->getArgOperand(2));
2901  } else if (IsX86 && Name.startswith("avx512.mask.add.p")) {
2902  if (Name.endswith(".512")) {
2903  Intrinsic::ID IID;
2904  if (Name[17] == 's')
2905  IID = Intrinsic::x86_avx512_add_ps_512;
2906  else
2907  IID = Intrinsic::x86_avx512_add_pd_512;
2908 
2909  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2910  { CI->getArgOperand(0), CI->getArgOperand(1),
2911  CI->getArgOperand(4) });
2912  } else {
2913  Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2914  }
2915  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2916  CI->getArgOperand(2));
2917  } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
2918  if (Name.endswith(".512")) {
2919  Intrinsic::ID IID;
2920  if (Name[17] == 's')
2921  IID = Intrinsic::x86_avx512_div_ps_512;
2922  else
2923  IID = Intrinsic::x86_avx512_div_pd_512;
2924 
2925  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2926  { CI->getArgOperand(0), CI->getArgOperand(1),
2927  CI->getArgOperand(4) });
2928  } else {
2929  Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
2930  }
2931  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2932  CI->getArgOperand(2));
2933  } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
2934  if (Name.endswith(".512")) {
2935  Intrinsic::ID IID;
2936  if (Name[17] == 's')
2937  IID = Intrinsic::x86_avx512_mul_ps_512;
2938  else
2939  IID = Intrinsic::x86_avx512_mul_pd_512;
2940 
2941  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2942  { CI->getArgOperand(0), CI->getArgOperand(1),
2943  CI->getArgOperand(4) });
2944  } else {
2945  Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
2946  }
2947  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2948  CI->getArgOperand(2));
2949  } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
2950  if (Name.endswith(".512")) {
2951  Intrinsic::ID IID;
2952  if (Name[17] == 's')
2953  IID = Intrinsic::x86_avx512_sub_ps_512;
2954  else
2955  IID = Intrinsic::x86_avx512_sub_pd_512;
2956 
2957  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2958  { CI->getArgOperand(0), CI->getArgOperand(1),
2959  CI->getArgOperand(4) });
2960  } else {
2961  Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
2962  }
2963  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2964  CI->getArgOperand(2));
2965  } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
2966  Name.startswith("avx512.mask.min.p")) &&
2967  Name.drop_front(18) == ".512") {
2968  bool IsDouble = Name[17] == 'd';
2969  bool IsMin = Name[13] == 'i';
2970  static const Intrinsic::ID MinMaxTbl[2][2] = {
2971  { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
2972  { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
2973  };
2974  Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
2975 
2976  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2977  { CI->getArgOperand(0), CI->getArgOperand(1),
2978  CI->getArgOperand(4) });
2979  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2980  CI->getArgOperand(2));
2981  } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
2982  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2983  Intrinsic::ctlz,
2984  CI->getType()),
2985  { CI->getArgOperand(0), Builder.getInt1(false) });
2986  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2987  CI->getArgOperand(1));
2988  } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
2989  bool IsImmediate = Name[16] == 'i' ||
2990  (Name.size() > 18 && Name[18] == 'i');
2991  bool IsVariable = Name[16] == 'v';
2992  char Size = Name[16] == '.' ? Name[17] :
2993  Name[17] == '.' ? Name[18] :
2994  Name[18] == '.' ? Name[19] :
2995  Name[20];
2996 
2997  Intrinsic::ID IID;
2998  if (IsVariable && Name[17] != '.') {
2999  if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3000  IID = Intrinsic::x86_avx2_psllv_q;
3001  else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3002  IID = Intrinsic::x86_avx2_psllv_q_256;
3003  else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3004  IID = Intrinsic::x86_avx2_psllv_d;
3005  else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3006  IID = Intrinsic::x86_avx2_psllv_d_256;
3007  else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3008  IID = Intrinsic::x86_avx512_psllv_w_128;
3009  else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3010  IID = Intrinsic::x86_avx512_psllv_w_256;
3011  else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3012  IID = Intrinsic::x86_avx512_psllv_w_512;
3013  else
3014  llvm_unreachable("Unexpected size");
3015  } else if (Name.endswith(".128")) {
3016  if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3017  IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3018  : Intrinsic::x86_sse2_psll_d;
3019  else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3020  IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3021  : Intrinsic::x86_sse2_psll_q;
3022  else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3023  IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3024  : Intrinsic::x86_sse2_psll_w;
3025  else
3026  llvm_unreachable("Unexpected size");
3027  } else if (Name.endswith(".256")) {
3028  if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3029  IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3030  : Intrinsic::x86_avx2_psll_d;
3031  else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3032  IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3033  : Intrinsic::x86_avx2_psll_q;
3034  else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3035  IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3036  : Intrinsic::x86_avx2_psll_w;
3037  else
3038  llvm_unreachable("Unexpected size");
3039  } else {
3040  if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3041  IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
3042  IsVariable ? Intrinsic::x86_avx512_psllv_d_512 :
3043  Intrinsic::x86_avx512_psll_d_512;
3044  else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3045  IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
3046  IsVariable ? Intrinsic::x86_avx512_psllv_q_512 :
3047  Intrinsic::x86_avx512_psll_q_512;
3048  else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3049  IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3050  : Intrinsic::x86_avx512_psll_w_512;
3051  else
3052  llvm_unreachable("Unexpected size");
3053  }
3054 
3055  Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3056  } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
3057  bool IsImmediate = Name[16] == 'i' ||
3058  (Name.size() > 18 && Name[18] == 'i');
3059  bool IsVariable = Name[16] == 'v';
3060  char Size = Name[16] == '.' ? Name[17] :
3061  Name[17] == '.' ? Name[18] :
3062  Name[18] == '.' ? Name[19] :
3063  Name[20];
3064 
3065  Intrinsic::ID IID;
3066  if (IsVariable && Name[17] != '.') {
3067  if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3068  IID = Intrinsic::x86_avx2_psrlv_q;
3069  else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3070  IID = Intrinsic::x86_avx2_psrlv_q_256;
3071  else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3072  IID = Intrinsic::x86_avx2_psrlv_d;
3073  else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3074  IID = Intrinsic::x86_avx2_psrlv_d_256;
3075  else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3076  IID = Intrinsic::x86_avx512_psrlv_w_128;
3077  else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3078  IID = Intrinsic::x86_avx512_psrlv_w_256;
3079  else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3080  IID = Intrinsic::x86_avx512_psrlv_w_512;
3081  else
3082  llvm_unreachable("Unexpected size");
3083  } else if (Name.endswith(".128")) {
3084  if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3085  IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3086  : Intrinsic::x86_sse2_psrl_d;
3087  else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3088  IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3089  : Intrinsic::x86_sse2_psrl_q;
3090  else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3091  IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3092  : Intrinsic::x86_sse2_psrl_w;
3093  else
3094  llvm_unreachable("Unexpected size");
3095  } else if (Name.endswith(".256")) {
3096  if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3097  IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3098  : Intrinsic::x86_avx2_psrl_d;
3099  else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3100  IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3101  : Intrinsic::x86_avx2_psrl_q;
3102  else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3103  IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3104  : Intrinsic::x86_avx2_psrl_w;
3105  else
3106  llvm_unreachable("Unexpected size");
3107  } else {
3108  if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3109  IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
3110  IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 :
3111  Intrinsic::x86_avx512_psrl_d_512;
3112  else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3113  IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
3114  IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 :
3115  Intrinsic::x86_avx512_psrl_q_512;
3116  else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3117  IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3118  : Intrinsic::x86_avx512_psrl_w_512;
3119  else
3120  llvm_unreachable("Unexpected size");
3121  }
3122 
3123  Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3124  } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
3125  bool IsImmediate = Name[16] == 'i' ||
3126  (Name.size() > 18 && Name[18] == 'i');
3127  bool IsVariable = Name[16] == 'v';
3128  char Size = Name[16] == '.' ? Name[17] :
3129  Name[17] == '.' ? Name[18] :
3130  Name[18] == '.' ? Name[19] :
3131  Name[20];
3132 
3133  Intrinsic::ID IID;
3134  if (IsVariable && Name[17] != '.') {
3135  if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3136  IID = Intrinsic::x86_avx2_psrav_d;
3137  else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3138  IID = Intrinsic::x86_avx2_psrav_d_256;
3139  else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3140  IID = Intrinsic::x86_avx512_psrav_w_128;
3141  else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3142  IID = Intrinsic::x86_avx512_psrav_w_256;
3143  else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3144  IID = Intrinsic::x86_avx512_psrav_w_512;
3145  else
3146  llvm_unreachable("Unexpected size");
3147  } else if (Name.endswith(".128")) {
3148  if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3149  IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3150  : Intrinsic::x86_sse2_psra_d;
3151  else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3152  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
3153  IsVariable ? Intrinsic::x86_avx512_psrav_q_128 :
3154  Intrinsic::x86_avx512_psra_q_128;
3155  else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3156  IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3157  : Intrinsic::x86_sse2_psra_w;
3158  else
3159  llvm_unreachable("Unexpected size");
3160  } else if (Name.endswith(".256")) {
3161  if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3162  IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3163  : Intrinsic::x86_avx2_psra_d;
3164  else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3165  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
3166  IsVariable ? Intrinsic::x86_avx512_psrav_q_256 :
3167  Intrinsic::x86_avx512_psra_q_256;
3168  else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3169  IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3170  : Intrinsic::x86_avx2_psra_w;
3171  else
3172  llvm_unreachable("Unexpected size");
3173  } else {
3174  if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3175  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
3176  IsVariable ? Intrinsic::x86_avx512_psrav_d_512 :
3177  Intrinsic::x86_avx512_psra_d_512;
3178  else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3179  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
3180  IsVariable ? Intrinsic::x86_avx512_psrav_q_512 :
3181  Intrinsic::x86_avx512_psra_q_512;
3182  else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3183  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3184  : Intrinsic::x86_avx512_psra_w_512;
3185  else
3186  llvm_unreachable("Unexpected size");
3187  }
3188 
3189  Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3190  } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
3191  Rep = upgradeMaskedMove(Builder, *CI);
3192  } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
3193  Rep = UpgradeMaskToInt(Builder, *CI);
3194  } else if (IsX86 && Name.endswith(".movntdqa")) {
3195  Module *M = F->getParent();
3196  MDNode *Node = MDNode::get(
3198 
3199  Value *Ptr = CI->getArgOperand(0);
3200 
3201  // Convert the type of the pointer to a pointer to the stored type.
3202  Value *BC = Builder.CreateBitCast(
3203  Ptr, PointerType::getUnqual(CI->getType()), "cast");
3204  LoadInst *LI = Builder.CreateAlignedLoad(
3205  CI->getType(), BC,
3207  LI->setMetadata(M->getMDKindID("nontemporal"), Node);
3208  Rep = LI;
3209  } else if (IsX86 && (Name.startswith("fma.vfmadd.") ||
3210  Name.startswith("fma.vfmsub.") ||
3211  Name.startswith("fma.vfnmadd.") ||
3212  Name.startswith("fma.vfnmsub."))) {
3213  bool NegMul = Name[6] == 'n';
3214  bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3215  bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3216 
3217  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3218  CI->getArgOperand(2) };
3219 
3220  if (IsScalar) {
3221  Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3222  Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3223  Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3224  }
3225 
3226  if (NegMul && !IsScalar)
3227  Ops[0] = Builder.CreateFNeg(Ops[0]);
3228  if (NegMul && IsScalar)
3229  Ops[1] = Builder.CreateFNeg(Ops[1]);
3230  if (NegAcc)
3231  Ops[2] = Builder.CreateFNeg(Ops[2]);
3232 
3233  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3234  Intrinsic::fma,
3235  Ops[0]->getType()),
3236  Ops);
3237 
3238  if (IsScalar)
3239  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
3240  (uint64_t)0);
3241  } else if (IsX86 && Name.startswith("fma4.vfmadd.s")) {
3242  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3243  CI->getArgOperand(2) };
3244 
3245  Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3246  Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3247  Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3248 
3249  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3250  Intrinsic::fma,
3251  Ops[0]->getType()),
3252  Ops);
3253 
3254  Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
3255  Rep, (uint64_t)0);
3256  } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") ||
3257  Name.startswith("avx512.maskz.vfmadd.s") ||
3258  Name.startswith("avx512.mask3.vfmadd.s") ||
3259  Name.startswith("avx512.mask3.vfmsub.s") ||
3260  Name.startswith("avx512.mask3.vfnmsub.s"))) {
3261  bool IsMask3 = Name[11] == '3';
3262  bool IsMaskZ = Name[11] == 'z';
3263  // Drop the "avx512.mask." to make it easier.
3264  Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3265  bool NegMul = Name[2] == 'n';
3266  bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3267 
3268  Value *A = CI->getArgOperand(0);
3269  Value *B = CI->getArgOperand(1);
3270  Value *C = CI->getArgOperand(2);
3271 
3272  if (NegMul && (IsMask3 || IsMaskZ))
3273  A = Builder.CreateFNeg(A);
3274  if (NegMul && !(IsMask3 || IsMaskZ))
3275  B = Builder.CreateFNeg(B);
3276  if (NegAcc)
3277  C = Builder.CreateFNeg(C);
3278 
3279  A = Builder.CreateExtractElement(A, (uint64_t)0);
3280  B = Builder.CreateExtractElement(B, (uint64_t)0);
3281  C = Builder.CreateExtractElement(C, (uint64_t)0);
3282 
3283  if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3284  cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3285  Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
3286 
3287  Intrinsic::ID IID;
3288  if (Name.back() == 'd')
3289  IID = Intrinsic::x86_avx512_vfmadd_f64;
3290  else
3291  IID = Intrinsic::x86_avx512_vfmadd_f32;
3293  Rep = Builder.CreateCall(FMA, Ops);
3294  } else {
3296  Intrinsic::fma,
3297  A->getType());
3298  Rep = Builder.CreateCall(FMA, { A, B, C });
3299  }
3300 
3301  Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
3302  IsMask3 ? C : A;
3303 
3304  // For Mask3 with NegAcc, we need to create a new extractelement that
3305  // avoids the negation above.
3306  if (NegAcc && IsMask3)
3307  PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
3308  (uint64_t)0);
3309 
3311  Rep, PassThru);
3312  Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
3313  Rep, (uint64_t)0);
3314  } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") ||
3315  Name.startswith("avx512.mask.vfnmadd.p") ||
3316  Name.startswith("avx512.mask.vfnmsub.p") ||
3317  Name.startswith("avx512.mask3.vfmadd.p") ||
3318  Name.startswith("avx512.mask3.vfmsub.p") ||
3319  Name.startswith("avx512.mask3.vfnmsub.p") ||
3320  Name.startswith("avx512.maskz.vfmadd.p"))) {
3321  bool IsMask3 = Name[11] == '3';
3322  bool IsMaskZ = Name[11] == 'z';
3323  // Drop the "avx512.mask." to make it easier.
3324  Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3325  bool NegMul = Name[2] == 'n';
3326  bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3327 
3328  Value *A = CI->getArgOperand(0);
3329  Value *B = CI->getArgOperand(1);
3330  Value *C = CI->getArgOperand(2);
3331 
3332  if (NegMul && (IsMask3 || IsMaskZ))
3333  A = Builder.CreateFNeg(A);
3334  if (NegMul && !(IsMask3 || IsMaskZ))
3335  B = Builder.CreateFNeg(B);
3336  if (NegAcc)
3337  C = Builder.CreateFNeg(C);
3338 
3339  if (CI->getNumArgOperands() == 5 &&
3340  (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3341  cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3342  Intrinsic::ID IID;
3343  // Check the character before ".512" in string.
3344  if (Name[Name.size()-5] == 's')
3345  IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3346  else
3347  IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3348 
3349  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3350  { A, B, C, CI->getArgOperand(4) });
3351  } else {
3353  Intrinsic::fma,
3354  A->getType());
3355  Rep = Builder.CreateCall(FMA, { A, B, C });
3356  }
3357 
3358  Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3359  IsMask3 ? CI->getArgOperand(2) :
3360  CI->getArgOperand(0);
3361 
3362  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3363  } else if (IsX86 && Name.startswith("fma.vfmsubadd.p")) {
3364  unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3365  unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3366  Intrinsic::ID IID;
3367  if (VecWidth == 128 && EltWidth == 32)
3368  IID = Intrinsic::x86_fma_vfmaddsub_ps;
3369  else if (VecWidth == 256 && EltWidth == 32)
3370  IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
3371  else if (VecWidth == 128 && EltWidth == 64)
3372  IID = Intrinsic::x86_fma_vfmaddsub_pd;
3373  else if (VecWidth == 256 && EltWidth == 64)
3374  IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
3375  else
3376  llvm_unreachable("Unexpected intrinsic");
3377 
3378  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3379  CI->getArgOperand(2) };
3380  Ops[2] = Builder.CreateFNeg(Ops[2]);
3381  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3382  Ops);
3383  } else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") ||
3384  Name.startswith("avx512.mask3.vfmaddsub.p") ||
3385  Name.startswith("avx512.maskz.vfmaddsub.p") ||
3386  Name.startswith("avx512.mask3.vfmsubadd.p"))) {
3387  bool IsMask3 = Name[11] == '3';
3388  bool IsMaskZ = Name[11] == 'z';
3389  // Drop the "avx512.mask." to make it easier.
3390  Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3391  bool IsSubAdd = Name[3] == 's';
3392  if (CI->getNumArgOperands() == 5) {
3393  Intrinsic::ID IID;
3394  // Check the character before ".512" in string.
3395  if (Name[Name.size()-5] == 's')
3396  IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3397  else
3398  IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3399 
3400  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3401  CI->getArgOperand(2), CI->getArgOperand(4) };
3402  if (IsSubAdd)
3403  Ops[2] = Builder.CreateFNeg(Ops[2]);
3404 
3405  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3406  Ops);
3407  } else {
3408  int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3409 
3410  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3411  CI->getArgOperand(2) };
3412 
3413  Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3414  Ops[0]->getType());
3415  Value *Odd = Builder.CreateCall(FMA, Ops);
3416  Ops[2] = Builder.CreateFNeg(Ops[2]);
3417  Value *Even = Builder.CreateCall(FMA, Ops);
3418 
3419  if (IsSubAdd)
3420  std::swap(Even, Odd);
3421 
3422  SmallVector<int, 32> Idxs(NumElts);
3423  for (int i = 0; i != NumElts; ++i)
3424  Idxs[i] = i + (i % 2) * NumElts;
3425 
3426  Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3427  }
3428 
3429  Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3430  IsMask3 ? CI->getArgOperand(2) :
3431  CI->getArgOperand(0);
3432 
3433  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3434  } else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
3435  Name.startswith("avx512.maskz.pternlog."))) {
3436  bool ZeroMask = Name[11] == 'z';
3437  unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3438  unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3439  Intrinsic::ID IID;
3440  if (VecWidth == 128 && EltWidth == 32)
3441  IID = Intrinsic::x86_avx512_pternlog_d_128;
3442  else if (VecWidth == 256 && EltWidth == 32)
3443  IID = Intrinsic::x86_avx512_pternlog_d_256;
3444  else if (VecWidth == 512 && EltWidth == 32)
3445  IID = Intrinsic::x86_avx512_pternlog_d_512;
3446  else if (VecWidth == 128 && EltWidth == 64)
3447  IID = Intrinsic::x86_avx512_pternlog_q_128;
3448  else if (VecWidth == 256 && EltWidth == 64)
3449  IID = Intrinsic::x86_avx512_pternlog_q_256;
3450  else if (VecWidth == 512 && EltWidth == 64)
3451  IID = Intrinsic::x86_avx512_pternlog_q_512;
3452  else
3453  llvm_unreachable("Unexpected intrinsic");
3454 
3455  Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3456  CI->getArgOperand(2), CI->getArgOperand(3) };
3457  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3458  Args);
3459  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3460  : CI->getArgOperand(0);
3461  Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3462  } else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") ||
3463  Name.startswith("avx512.maskz.vpmadd52"))) {
3464  bool ZeroMask = Name[11] == 'z';
3465  bool High = Name[20] == 'h' || Name[21] == 'h';
3466  unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3467  Intrinsic::ID IID;
3468  if (VecWidth == 128 && !High)
3469  IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3470  else if (VecWidth == 256 && !High)
3471  IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3472  else if (VecWidth == 512 && !High)
3473  IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3474  else if (VecWidth == 128 && High)
3475  IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3476  else if (VecWidth == 256 && High)
3477  IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3478  else if (VecWidth == 512 && High)
3479  IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3480  else
3481  llvm_unreachable("Unexpected intrinsic");
3482 
3483  Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3484  CI->getArgOperand(2) };
3485  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3486  Args);
3487  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3488  : CI->getArgOperand(0);
3489  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3490  } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") ||
3491  Name.startswith("avx512.mask.vpermt2var.") ||
3492  Name.startswith("avx512.maskz.vpermt2var."))) {
3493  bool ZeroMask = Name[11] == 'z';
3494  bool IndexForm = Name[17] == 'i';
3495  Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
3496  } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
3497  Name.startswith("avx512.maskz.vpdpbusd.") ||
3498  Name.startswith("avx512.mask.vpdpbusds.") ||
3499  Name.startswith("avx512.maskz.vpdpbusds."))) {
3500  bool ZeroMask = Name[11] == 'z';
3501  bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3502  unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3503  Intrinsic::ID IID;
3504  if (VecWidth == 128 && !IsSaturating)
3505  IID = Intrinsic::x86_avx512_vpdpbusd_128;
3506  else if (VecWidth == 256 && !IsSaturating)
3507  IID = Intrinsic::x86_avx512_vpdpbusd_256;
3508  else if (VecWidth == 512 && !IsSaturating)
3509  IID = Intrinsic::x86_avx512_vpdpbusd_512;
3510  else if (VecWidth == 128 && IsSaturating)
3511  IID = Intrinsic::x86_avx512_vpdpbusds_128;
3512  else if (VecWidth == 256 && IsSaturating)
3513  IID = Intrinsic::x86_avx512_vpdpbusds_256;
3514  else if (VecWidth == 512 && IsSaturating)
3515  IID = Intrinsic::x86_avx512_vpdpbusds_512;
3516  else
3517  llvm_unreachable("Unexpected intrinsic");
3518 
3519  Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3520  CI->getArgOperand(2) };
3521  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3522  Args);
3523  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3524  : CI->getArgOperand(0);
3525  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3526  } else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") ||
3527  Name.startswith("avx512.maskz.vpdpwssd.") ||
3528  Name.startswith("avx512.mask.vpdpwssds.") ||
3529  Name.startswith("avx512.maskz.vpdpwssds."))) {
3530  bool ZeroMask = Name[11] == 'z';
3531  bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3532  unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3533  Intrinsic::ID IID;
3534  if (VecWidth == 128 && !IsSaturating)
3535  IID = Intrinsic::x86_avx512_vpdpwssd_128;
3536  else if (VecWidth == 256 && !IsSaturating)
3537  IID = Intrinsic::x86_avx512_vpdpwssd_256;
3538  else if (VecWidth == 512 && !IsSaturating)
3539  IID = Intrinsic::x86_avx512_vpdpwssd_512;
3540  else if (VecWidth == 128 && IsSaturating)
3541  IID = Intrinsic::x86_avx512_vpdpwssds_128;
3542  else if (VecWidth == 256 && IsSaturating)
3543  IID = Intrinsic::x86_avx512_vpdpwssds_256;
3544  else if (VecWidth == 512 && IsSaturating)
3545  IID = Intrinsic::x86_avx512_vpdpwssds_512;
3546  else
3547  llvm_unreachable("Unexpected intrinsic");
3548 
3549  Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3550  CI->getArgOperand(2) };
3551  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3552  Args);
3553  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3554  : CI->getArgOperand(0);
3555  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3556  } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
3557  Name == "addcarry.u32" || Name == "addcarry.u64" ||
3558  Name == "subborrow.u32" || Name == "subborrow.u64")) {
3559  Intrinsic::ID IID;
3560  if (Name[0] == 'a' && Name.back() == '2')
3561  IID = Intrinsic::x86_addcarry_32;
3562  else if (Name[0] == 'a' && Name.back() == '4')
3563  IID = Intrinsic::x86_addcarry_64;
3564  else if (Name[0] == 's' && Name.back() == '2')
3565  IID = Intrinsic::x86_subborrow_32;
3566  else if (Name[0] == 's' && Name.back() == '4')
3567  IID = Intrinsic::x86_subborrow_64;
3568  else
3569  llvm_unreachable("Unexpected intrinsic");
3570 
3571  // Make a call with 3 operands.
3572  Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3573  CI->getArgOperand(2)};
3574  Value *NewCall = Builder.CreateCall(
3576  Args);
3577 
3578  // Extract the second result and store it.
3579  Value *Data = Builder.CreateExtractValue(NewCall, 1);
3580  // Cast the pointer to the right type.
3581  Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
3582  llvm::PointerType::getUnqual(Data->getType()));
3583  Builder.CreateAlignedStore(Data, Ptr, Align(1));
3584  // Replace the original call result with the first result of the new call.
3585  Value *CF = Builder.CreateExtractValue(NewCall, 0);
3586 
3587  CI->replaceAllUsesWith(CF);
3588  Rep = nullptr;
3589  } else if (IsX86 && Name.startswith("avx512.mask.") &&
3590  upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
3591  // Rep will be updated by the call in the condition.
3592  } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
3593  Value *Arg = CI->getArgOperand(0);
3594  Value *Neg = Builder.CreateNeg(Arg, "neg");
3595  Value *Cmp = Builder.CreateICmpSGE(
3596  Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
3597  Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
3598  } else if (IsNVVM && (Name.startswith("atomic.load.add.f32.p") ||
3599  Name.startswith("atomic.load.add.f64.p"))) {
3600  Value *Ptr = CI->getArgOperand(0);
3601  Value *Val = CI->getArgOperand(1);
3602  Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
3604  } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
3605  Name == "max.ui" || Name == "max.ull")) {
3606  Value *Arg0 = CI->getArgOperand(0);
3607  Value *Arg1 = CI->getArgOperand(1);
3608  Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3609  ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
3610  : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
3611  Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
3612  } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
3613  Name == "min.ui" || Name == "min.ull")) {
3614  Value *Arg0 = CI->getArgOperand(0);
3615  Value *Arg1 = CI->getArgOperand(1);
3616  Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3617  ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
3618  : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
3619  Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
3620  } else if (IsNVVM && Name == "clz.ll") {
3621  // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
3622  Value *Arg = CI->getArgOperand(0);
3623  Value *Ctlz = Builder.CreateCall(
3624  Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
3625  {Arg->getType()}),
3626  {Arg, Builder.getFalse()}, "ctlz");
3627  Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
3628  } else if (IsNVVM && Name == "popc.ll") {
3629  // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
3630  // i64.
3631  Value *Arg = CI->getArgOperand(0);
3632  Value *Popc = Builder.CreateCall(
3633  Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
3634  {Arg->getType()}),
3635  Arg, "ctpop");
3636  Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
3637  } else if (IsNVVM && Name == "h2f") {
3638  Rep = Builder.CreateCall(Intrinsic::getDeclaration(
3639  F->getParent(), Intrinsic::convert_from_fp16,
3640  {Builder.getFloatTy()}),
3641  CI->getArgOperand(0), "h2f");
3642  } else {
3643  llvm_unreachable("Unknown function for CallInst upgrade.");
3644  }
3645 
3646  if (Rep)
3647  CI->replaceAllUsesWith(Rep);
3648  CI->eraseFromParent();
3649  return;
3650  }
3651 
3652  const auto &DefaultCase = [&NewFn, &CI]() -> void {
3653  // Handle generic mangling change, but nothing else
3654  assert(
3655  (CI->getCalledFunction()->getName() != NewFn->getName()) &&
3656  "Unknown function for CallInst upgrade and isn't just a name change");
3657  CI->setCalledFunction(NewFn);
3658  };
3659  CallInst *NewCall = nullptr;
3660  switch (NewFn->getIntrinsicID()) {
3661  default: {
3662  DefaultCase();
3663  return;
3664  }
3665  case Intrinsic::arm_neon_vld1:
3666  case Intrinsic::arm_neon_vld2:
3667  case Intrinsic::arm_neon_vld3:
3668  case Intrinsic::arm_neon_vld4:
3669  case Intrinsic::arm_neon_vld2lane:
3670  case Intrinsic::arm_neon_vld3lane:
3671  case Intrinsic::arm_neon_vld4lane:
3672  case Intrinsic::arm_neon_vst1:
3673  case Intrinsic::arm_neon_vst2:
3674  case Intrinsic::arm_neon_vst3:
3675  case Intrinsic::arm_neon_vst4:
3676  case Intrinsic::arm_neon_vst2lane:
3677  case Intrinsic::arm_neon_vst3lane:
3678  case Intrinsic::arm_neon_vst4lane: {
3680  CI->arg_operands().end());
3681  NewCall = Builder.CreateCall(NewFn, Args);
3682  break;
3683  }
3684 
3685  case Intrinsic::arm_neon_bfdot:
3686  case Intrinsic::arm_neon_bfmmla:
3687  case Intrinsic::arm_neon_bfmlalb:
3688  case Intrinsic::arm_neon_bfmlalt:
3689  case Intrinsic::aarch64_neon_bfdot:
3690  case Intrinsic::aarch64_neon_bfmmla:
3691  case Intrinsic::aarch64_neon_bfmlalb:
3692  case Intrinsic::aarch64_neon_bfmlalt: {
3694  assert(CI->getNumArgOperands() == 3 &&
3695  "Mismatch between function args and call args");
3696  size_t OperandWidth =
3698  assert((OperandWidth == 64 || OperandWidth == 128) &&
3699  "Unexpected operand width");
3700  Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
3701  auto Iter = CI->arg_operands().begin();
3702  Args.push_back(*Iter++);
3703  Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
3704  Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
3705  NewCall = Builder.CreateCall(NewFn, Args);
3706  break;
3707  }
3708 
3709  case Intrinsic::bitreverse:
3710  NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3711  break;
3712 
3713  case Intrinsic::ctlz:
3714  case Intrinsic::cttz:
3715  assert(CI->getNumArgOperands() == 1 &&
3716  "Mismatch between function args and call args");
3717  NewCall =
3718  Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
3719  break;
3720 
3721  case Intrinsic::objectsize: {
3722  Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
3723  ? Builder.getFalse()
3724  : CI->getArgOperand(2);
3725  Value *Dynamic =
3726  CI->getNumArgOperands() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
3727  NewCall = Builder.CreateCall(
3728  NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
3729  break;
3730  }
3731 
3732  case Intrinsic::ctpop:
3733  NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3734  break;
3735 
3736  case Intrinsic::convert_from_fp16:
3737  NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3738  break;
3739 
3740  case Intrinsic::dbg_value:
3741  // Upgrade from the old version that had an extra offset argument.
3742  assert(CI->getNumArgOperands() == 4);
3743  // Drop nonzero offsets instead of attempting to upgrade them.
3744  if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
3745  if (Offset->isZeroValue()) {
3746  NewCall = Builder.CreateCall(
3747  NewFn,
3748  {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
3749  break;
3750  }
3751  CI->eraseFromParent();
3752  return;
3753 
3754  case Intrinsic::ptr_annotation:
3755  // Upgrade from versions that lacked the annotation attribute argument.
3756  assert(CI->getNumArgOperands() == 4 &&
3757  "Before LLVM 12.0 this intrinsic took four arguments");
3758  // Create a new call with an added null annotation attribute argument.
3759  NewCall = Builder.CreateCall(
3760  NewFn,
3761  {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
3762  CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
3763  NewCall->takeName(CI);
3764  CI->replaceAllUsesWith(NewCall);
3765  CI->eraseFromParent();
3766  return;
3767 
3768  case Intrinsic::var_annotation:
3769  // Upgrade from versions that lacked the annotation attribute argument.
3770  assert(CI->getNumArgOperands() == 4 &&
3771  "Before LLVM 12.0 this intrinsic took four arguments");
3772  // Create a new call with an added null annotation attribute argument.
3773  NewCall = Builder.CreateCall(
3774  NewFn,
3775  {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
3776  CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
3777  CI->eraseFromParent();
3778  return;
3779 
3780  case Intrinsic::x86_xop_vfrcz_ss:
3781  case Intrinsic::x86_xop_vfrcz_sd:
3782  NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
3783  break;
3784 
3785  case Intrinsic::x86_xop_vpermil2pd:
3786  case Intrinsic::x86_xop_vpermil2ps:
3787  case Intrinsic::x86_xop_vpermil2pd_256:
3788  case Intrinsic::x86_xop_vpermil2ps_256: {
3790  CI->arg_operands().end());
3791  VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
3792  VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
3793  Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
3794  NewCall = Builder.CreateCall(NewFn, Args);
3795  break;
3796  }
3797 
3798  case Intrinsic::x86_sse41_ptestc:
3799  case Intrinsic::x86_sse41_ptestz:
3800  case Intrinsic::x86_sse41_ptestnzc: {
3801  // The arguments for these intrinsics used to be v4f32, and changed
3802  // to v2i64. This is purely a nop, since those are bitwise intrinsics.
3803  // So, the only thing required is a bitcast for both arguments.
3804  // First, check the arguments have the old type.
3805  Value *Arg0 = CI->getArgOperand(0);
3806  if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
3807  return;
3808 
3809  // Old intrinsic, add bitcasts
3810  Value *Arg1 = CI->getArgOperand(1);
3811 
3812  auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
3813 
3814  Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
3815  Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
3816 
3817  NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
3818  break;
3819  }
3820 
3821  case Intrinsic::x86_rdtscp: {
3822  // This used to take 1 arguments. If we have no arguments, it is already
3823  // upgraded.
3824  if (CI->getNumOperands() == 0)
3825  return;
3826 
3827  NewCall = Builder.CreateCall(NewFn);
3828  // Extract the second result and store it.
3829  Value *Data = Builder.CreateExtractValue(NewCall, 1);
3830  // Cast the pointer to the right type.
3831  Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
3832  llvm::PointerType::getUnqual(Data->getType()));
3833  Builder.CreateAlignedStore(Data, Ptr, Align(1));
3834  // Replace the original call result with the first result of the new call.
3835  Value *TSC = Builder.CreateExtractValue(NewCall, 0);
3836 
3837  NewCall->takeName(CI);
3838  CI->replaceAllUsesWith(TSC);
3839  CI->eraseFromParent();
3840  return;
3841  }
3842 
3843  case Intrinsic::x86_sse41_insertps:
3844  case Intrinsic::x86_sse41_dppd:
3845  case Intrinsic::x86_sse41_dpps:
3846  case Intrinsic::x86_sse41_mpsadbw:
3847  case Intrinsic::x86_avx_dp_ps_256:
3848  case Intrinsic::x86_avx2_mpsadbw: {
3849  // Need to truncate the last argument from i32 to i8 -- this argument models
3850  // an inherently 8-bit immediate operand to these x86 instructions.
3852  CI->arg_operands().end());
3853 
3854  // Replace the last argument with a trunc.
3855  Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
3856  NewCall = Builder.CreateCall(NewFn, Args);
3857  break;
3858  }
3859 
3860  case Intrinsic::x86_avx512_mask_cmp_pd_128:
3861  case Intrinsic::x86_avx512_mask_cmp_pd_256:
3862  case Intrinsic::x86_avx512_mask_cmp_pd_512:
3863  case Intrinsic::x86_avx512_mask_cmp_ps_128:
3864  case Intrinsic::x86_avx512_mask_cmp_ps_256:
3865  case Intrinsic::x86_avx512_mask_cmp_ps_512: {
3867  CI->arg_operands().end());
3868  unsigned NumElts =
3869  cast<FixedVectorType>(Args[0]->getType())->getNumElements();
3870  Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
3871 
3872  NewCall = Builder.CreateCall(NewFn, Args);
3873  Value *Res = ApplyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
3874 
3875  NewCall->takeName(CI);
3876  CI->replaceAllUsesWith(Res);
3877  CI->eraseFromParent();
3878  return;
3879  }
3880 
3881  case Intrinsic::thread_pointer: {
3882  NewCall = Builder.CreateCall(NewFn, {});
3883  break;
3884  }
3885 
3886  case Intrinsic::invariant_start:
3887  case Intrinsic::invariant_end:
3888  case Intrinsic::masked_load:
3889  case Intrinsic::masked_store:
3890  case Intrinsic::masked_gather:
3891  case Intrinsic::masked_scatter: {
3893  CI->arg_operands().end());
3894  NewCall = Builder.CreateCall(NewFn, Args);
3895  break;
3896  }
3897 
3898  case Intrinsic::memcpy:
3899  case Intrinsic::memmove:
3900  case Intrinsic::memset: {
3901  // We have to make sure that the call signature is what we're expecting.
3902  // We only want to change the old signatures by removing the alignment arg:
3903  // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
3904  // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
3905  // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
3906  // -> @llvm.memset...(i8*, i8, i[32|64], i1)
3907  // Note: i8*'s in the above can be any pointer type
3908  if (CI->getNumArgOperands() != 5) {
3909  DefaultCase();
3910  return;
3911  }
3912  // Remove alignment argument (3), and add alignment attributes to the
3913  // dest/src pointers.
3914  Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
3915  CI->getArgOperand(2), CI->getArgOperand(4)};
3916  NewCall = Builder.CreateCall(NewFn, Args);
3917  auto *MemCI = cast<MemIntrinsic>(NewCall);
3918  // All mem intrinsics support dest alignment.
3919  const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
3920  MemCI->setDestAlignment(Align->getMaybeAlignValue());
3921  // Memcpy/Memmove also support source alignment.
3922  if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
3923  MTI->setSourceAlignment(Align->getMaybeAlignValue());
3924  break;
3925  }
3926  }
3927  assert(NewCall && "Should have either set this variable or returned through "
3928  "the default case");
3929  NewCall->takeName(CI);
3930  CI->replaceAllUsesWith(NewCall);
3931  CI->eraseFromParent();
3932 }
3933 
3935  assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
3936 
3937  // Check if this function should be upgraded and get the replacement function
3938  // if there is one.
3939  Function *NewFn;
3940  if (UpgradeIntrinsicFunction(F, NewFn)) {
3941  // Replace all users of the old function with the new function or new
3942  // instructions. This is not a range loop because the call is deleted.
3943  for (User *U : make_early_inc_range(F->users()))
3944  if (CallInst *CI = dyn_cast<CallInst>(U))
3945  UpgradeIntrinsicCall(CI, NewFn);
3946 
3947  // Remove old function, no longer used, from the module.
3948  F->eraseFromParent();
3949  }
3950 }
3951 
3953  // Check if the tag uses struct-path aware TBAA format.
3954  if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
3955  return &MD;
3956 
3957  auto &Context = MD.getContext();
3958  if (MD.getNumOperands() == 3) {
3959  Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
3960  MDNode *ScalarType = MDNode::get(Context, Elts);
3961  // Create a MDNode <ScalarType, ScalarType, offset 0, const>
3962  Metadata *Elts2[] = {ScalarType, ScalarType,
3965  MD.getOperand(2)};
3966  return MDNode::get(Context, Elts2);
3967  }
3968  // Create a MDNode <MD, MD, offset 0>
3971  return MDNode::get(Context, Elts);
3972 }
3973 
3974 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
3975  Instruction *&Temp) {
3976  if (Opc != Instruction::BitCast)
3977  return nullptr;
3978 
3979  Temp = nullptr;
3980  Type *SrcTy = V->getType();
3981  if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
3982  SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
3983  LLVMContext &Context = V->getContext();
3984 
3985  // We have no information about target data layout, so we assume that
3986  // the maximum pointer size is 64bit.
3987  Type *MidTy = Type::getInt64Ty(Context);
3988  Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
3989 
3990  return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
3991  }
3992 
3993  return nullptr;
3994 }
3995 
3996 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
3997  if (Opc != Instruction::BitCast)
3998  return nullptr;
3999 
4000  Type *SrcTy = C->getType();
4001  if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4002  SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4003  LLVMContext &Context = C->getContext();
4004 
4005  // We have no information about target data layout, so we assume that
4006  // the maximum pointer size is 64bit.
4007  Type *MidTy = Type::getInt64Ty(Context);
4008 
4010  DestTy);
4011  }
4012 
4013  return nullptr;
4014 }
4015 
4016 /// Check the debug info version number, if it is out-dated, drop the debug
4017 /// info. Return true if module is modified.
4021  bool BrokenDebugInfo = false;
4022  if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
4023  report_fatal_error("Broken module found, compilation aborted!");
4024  if (!BrokenDebugInfo)
4025  // Everything is ok.
4026  return false;
4027  else {
4028  // Diagnose malformed debug info.
4030  M.getContext().diagnose(Diag);
4031  }
4032  }
4033  bool Modified = StripDebugInfo(M);
4035  // Diagnose a version mismatch.
4037  M.getContext().diagnose(DiagVersion);
4038  }
4039  return Modified;
4040 }
4041 
4042 /// This checks for objc retain release marker which should be upgraded. It
4043 /// returns true if module is modified.
4045  bool Changed = false;
4046  const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
4047  NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
4048  if (ModRetainReleaseMarker) {
4049  MDNode *Op = ModRetainReleaseMarker->getOperand(0);
4050  if (Op) {
4051  MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
4052  if (ID) {
4053  SmallVector<StringRef, 4> ValueComp;
4054  ID->getString().split(ValueComp, "#");
4055  if (ValueComp.size() == 2) {
4056  std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
4057  ID = MDString::get(M.getContext(), NewValue);
4058  }
4059  M.addModuleFlag(Module::Error, MarkerKey, ID);
4060  M.eraseNamedMetadata(ModRetainReleaseMarker);
4061  Changed = true;
4062  }
4063  }
4064  }
4065  return Changed;
4066 }
4067 
4069  // This lambda converts normal function calls to ARC runtime functions to
4070  // intrinsic calls.
4071  auto UpgradeToIntrinsic = [&](const char *OldFunc,
4072  llvm::Intrinsic::ID IntrinsicFunc) {
4073  Function *Fn = M.getFunction(OldFunc);
4074 
4075  if (!Fn)
4076  return;
4077 
4078  Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc);
4079 
4080  for (User *U : make_early_inc_range(Fn->users())) {
4081  CallInst *CI = dyn_cast<CallInst>(U);
4082  if (!CI || CI->getCalledFunction() != Fn)
4083  continue;
4084 
4085  IRBuilder<> Builder(CI->getParent(), CI->getIterator());
4086  FunctionType *NewFuncTy = NewFn->getFunctionType();
4088 
4089  // Don't upgrade the intrinsic if it's not valid to bitcast the return
4090  // value to the return type of the old function.
4091  if (NewFuncTy->getReturnType() != CI->getType() &&
4092  !CastInst::castIsValid(Instruction::BitCast, CI,
4093  NewFuncTy->getReturnType()))
4094  continue;
4095 
4096  bool InvalidCast = false;
4097 
4098  for (unsigned I = 0, E = CI->getNumArgOperands(); I != E; ++I) {
4099  Value *Arg = CI->getArgOperand(I);
4100 
4101  // Bitcast argument to the parameter type of the new function if it's
4102  // not a variadic argument.
4103  if (I < NewFuncTy->getNumParams()) {
4104  // Don't upgrade the intrinsic if it's not valid to bitcast the argument
4105  // to the parameter type of the new function.
4106  if (!CastInst::castIsValid(Instruction::BitCast, Arg,
4107  NewFuncTy->getParamType(I))) {
4108  InvalidCast = true;
4109  break;
4110  }
4111  Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
4112  }
4113  Args.push_back(Arg);
4114  }
4115 
4116  if (InvalidCast)
4117  continue;
4118 
4119  // Create a call instruction that calls the new function.
4120  CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
4121  NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4122  NewCall->takeName(CI);
4123 
4124  // Bitcast the return value back to the type of the old call.
4125  Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
4126 
4127  if (!CI->use_empty())
4128  CI->replaceAllUsesWith(NewRetVal);
4129  CI->eraseFromParent();
4130  }
4131 
4132  if (Fn->use_empty())
4133  Fn->eraseFromParent();
4134  };
4135 
4136  // Unconditionally convert a call to "clang.arc.use" to a call to
4137  // "llvm.objc.clang.arc.use".
4138  UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
4139 
4140  // Upgrade the retain release marker. If there is no need to upgrade
4141  // the marker, that means either the module is already new enough to contain
4142  // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
4144  return;
4145 
4146  std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
4147  {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
4148  {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
4149  {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
4150  {"objc_autoreleaseReturnValue",
4151  llvm::Intrinsic::objc_autoreleaseReturnValue},
4152  {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
4153  {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
4154  {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
4155  {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
4156  {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
4157  {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
4158  {"objc_release", llvm::Intrinsic::objc_release},
4159  {"objc_retain", llvm::Intrinsic::objc_retain},
4160  {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
4161  {"objc_retainAutoreleaseReturnValue",
4162  llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
4163  {"objc_retainAutoreleasedReturnValue",
4164  llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
4165  {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
4166  {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
4167  {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
4168  {"objc_unsafeClaimAutoreleasedReturnValue",
4169  llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
4170  {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
4171  {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
4172  {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
4173  {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
4174  {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
4175  {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
4176  {"objc_arc_annotation_topdown_bbstart",
4177  llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
4178  {"objc_arc_annotation_topdown_bbend",
4179  llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
4180  {"objc_arc_annotation_bottomup_bbstart",
4181  llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
4182  {"objc_arc_annotation_bottomup_bbend",
4183  llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
4184 
4185  for (auto &I : RuntimeFuncs)
4186  UpgradeToIntrinsic(I.first, I.second);
4187 }
4188 
4190  NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
4191  if (!ModFlags)
4192  return false;
4193 
4194  bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
4195  bool HasSwiftVersionFlag = false;
4196  uint8_t SwiftMajorVersion, SwiftMinorVersion;
4197  uint32_t SwiftABIVersion;
4198  auto Int8Ty = Type::getInt8Ty(M.getContext());
4199  auto Int32Ty = Type::getInt32Ty(M.getContext());
4200 
4201  for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
4202  MDNode *Op = ModFlags->getOperand(I);
4203  if (Op->getNumOperands() != 3)
4204  continue;
4205  MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
4206  if (!ID)
4207  continue;
4208  if (ID->getString() == "Objective-C Image Info Version")
4209  HasObjCFlag = true;
4210  if (ID->getString() == "Objective-C Class Properties")
4211  HasClassProperties = true;
4212  // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
4213  // field was Error and now they are Max.
4214  if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") {
4215  if (auto *Behavior =
4216  mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
4217  if (Behavior->getLimitedValue() == Module::Error) {
4218  Type *Int32Ty = Type::getInt32Ty(M.getContext());
4219  Metadata *Ops[3] = {
4221  MDString::get(M.getContext(), ID->getString()),
4222  Op->getOperand(2)};
4223  ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
4224  Changed = true;
4225  }
4226  }
4227  }
4228  // Upgrade Objective-C Image Info Section. Removed the whitespce in the
4229  // section name so that llvm-lto will not complain about mismatching
4230  // module flags that is functionally the same.
4231  if (ID->getString() == "Objective-C Image Info Section") {
4232  if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
4233  SmallVector<StringRef, 4> ValueComp;
4234  Value->getString().split(ValueComp, " ");
4235  if (ValueComp.size() != 1) {
4236  std::string NewValue;
4237  for (auto &S : ValueComp)
4238  NewValue += S.str();
4239  Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
4240  MDString::get(M.getContext(), NewValue)};
4241  ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
4242  Changed = true;
4243  }
4244  }
4245  }
4246 
4247  // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
4248  // If the higher bits are set, it adds new module flag for swift info.
4249  if (ID->getString() == "Objective-C Garbage Collection") {
4250  auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
4251  if (Md) {
4252