LLVM 19.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/StringRef.h"
20#include "llvm/IR/Constants.h"
21#include "llvm/IR/DebugInfo.h"
24#include "llvm/IR/Function.h"
25#include "llvm/IR/IRBuilder.h"
26#include "llvm/IR/InstVisitor.h"
27#include "llvm/IR/Instruction.h"
29#include "llvm/IR/Intrinsics.h"
30#include "llvm/IR/IntrinsicsAArch64.h"
31#include "llvm/IR/IntrinsicsARM.h"
32#include "llvm/IR/IntrinsicsNVPTX.h"
33#include "llvm/IR/IntrinsicsRISCV.h"
34#include "llvm/IR/IntrinsicsWebAssembly.h"
35#include "llvm/IR/IntrinsicsX86.h"
36#include "llvm/IR/LLVMContext.h"
37#include "llvm/IR/Metadata.h"
38#include "llvm/IR/Module.h"
39#include "llvm/IR/Verifier.h"
42#include "llvm/Support/Regex.h"
44#include <cstring>
45
46using namespace llvm;
47
48static cl::opt<bool>
49 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
50 cl::desc("Disable autoupgrade of debug info"));
51
52static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
53
54// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
55// changed their type from v4f32 to v2i64.
57 Function *&NewFn) {
58 // Check whether this is an old version of the function, which received
59 // v4f32 arguments.
60 Type *Arg0Type = F->getFunctionType()->getParamType(0);
61 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
62 return false;
63
64 // Yes, it's old, replace it with new version.
65 rename(F);
66 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
67 return true;
68}
69
70// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
71// arguments have changed their type from i32 to i8.
73 Function *&NewFn) {
74 // Check that the last argument is an i32.
75 Type *LastArgType = F->getFunctionType()->getParamType(
76 F->getFunctionType()->getNumParams() - 1);
77 if (!LastArgType->isIntegerTy(32))
78 return false;
79
80 // Move this function aside and map down.
81 rename(F);
82 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
83 return true;
84}
85
86// Upgrade the declaration of fp compare intrinsics that change return type
87// from scalar to vXi1 mask.
89 Function *&NewFn) {
90 // Check if the return type is a vector.
91 if (F->getReturnType()->isVectorTy())
92 return false;
93
94 rename(F);
95 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
96 return true;
97}
98
100 Function *&NewFn) {
101 if (F->getReturnType()->getScalarType()->isBFloatTy())
102 return false;
103
104 rename(F);
105 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
106 return true;
107}
108
110 Function *&NewFn) {
111 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
112 return false;
113
114 rename(F);
115 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
116 return true;
117}
118
120 // All of the intrinsics matches below should be marked with which llvm
121 // version started autoupgrading them. At some point in the future we would
122 // like to use this information to remove upgrade code for some older
123 // intrinsics. It is currently undecided how we will determine that future
124 // point.
125 if (Name.consume_front("avx."))
126 return (Name.starts_with("blend.p") || // Added in 3.7
127 Name == "cvt.ps2.pd.256" || // Added in 3.9
128 Name == "cvtdq2.pd.256" || // Added in 3.9
129 Name == "cvtdq2.ps.256" || // Added in 7.0
130 Name.starts_with("movnt.") || // Added in 3.2
131 Name.starts_with("sqrt.p") || // Added in 7.0
132 Name.starts_with("storeu.") || // Added in 3.9
133 Name.starts_with("vbroadcast.s") || // Added in 3.5
134 Name.starts_with("vbroadcastf128") || // Added in 4.0
135 Name.starts_with("vextractf128.") || // Added in 3.7
136 Name.starts_with("vinsertf128.") || // Added in 3.7
137 Name.starts_with("vperm2f128.") || // Added in 6.0
138 Name.starts_with("vpermil.")); // Added in 3.1
139
140 if (Name.consume_front("avx2."))
141 return (Name == "movntdqa" || // Added in 5.0
142 Name.starts_with("pabs.") || // Added in 6.0
143 Name.starts_with("padds.") || // Added in 8.0
144 Name.starts_with("paddus.") || // Added in 8.0
145 Name.starts_with("pblendd.") || // Added in 3.7
146 Name == "pblendw" || // Added in 3.7
147 Name.starts_with("pbroadcast") || // Added in 3.8
148 Name.starts_with("pcmpeq.") || // Added in 3.1
149 Name.starts_with("pcmpgt.") || // Added in 3.1
150 Name.starts_with("pmax") || // Added in 3.9
151 Name.starts_with("pmin") || // Added in 3.9
152 Name.starts_with("pmovsx") || // Added in 3.9
153 Name.starts_with("pmovzx") || // Added in 3.9
154 Name == "pmul.dq" || // Added in 7.0
155 Name == "pmulu.dq" || // Added in 7.0
156 Name.starts_with("psll.dq") || // Added in 3.7
157 Name.starts_with("psrl.dq") || // Added in 3.7
158 Name.starts_with("psubs.") || // Added in 8.0
159 Name.starts_with("psubus.") || // Added in 8.0
160 Name.starts_with("vbroadcast") || // Added in 3.8
161 Name == "vbroadcasti128" || // Added in 3.7
162 Name == "vextracti128" || // Added in 3.7
163 Name == "vinserti128" || // Added in 3.7
164 Name == "vperm2i128"); // Added in 6.0
165
166 if (Name.consume_front("avx512.")) {
167 if (Name.consume_front("mask."))
168 // 'avx512.mask.*'
169 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
170 Name.starts_with("and.") || // Added in 3.9
171 Name.starts_with("andn.") || // Added in 3.9
172 Name.starts_with("broadcast.s") || // Added in 3.9
173 Name.starts_with("broadcastf32x4.") || // Added in 6.0
174 Name.starts_with("broadcastf32x8.") || // Added in 6.0
175 Name.starts_with("broadcastf64x2.") || // Added in 6.0
176 Name.starts_with("broadcastf64x4.") || // Added in 6.0
177 Name.starts_with("broadcasti32x4.") || // Added in 6.0
178 Name.starts_with("broadcasti32x8.") || // Added in 6.0
179 Name.starts_with("broadcasti64x2.") || // Added in 6.0
180 Name.starts_with("broadcasti64x4.") || // Added in 6.0
181 Name.starts_with("cmp.b") || // Added in 5.0
182 Name.starts_with("cmp.d") || // Added in 5.0
183 Name.starts_with("cmp.q") || // Added in 5.0
184 Name.starts_with("cmp.w") || // Added in 5.0
185 Name.starts_with("compress.b") || // Added in 9.0
186 Name.starts_with("compress.d") || // Added in 9.0
187 Name.starts_with("compress.p") || // Added in 9.0
188 Name.starts_with("compress.q") || // Added in 9.0
189 Name.starts_with("compress.store.") || // Added in 7.0
190 Name.starts_with("compress.w") || // Added in 9.0
191 Name.starts_with("conflict.") || // Added in 9.0
192 Name.starts_with("cvtdq2pd.") || // Added in 4.0
193 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
194 Name == "cvtpd2dq.256" || // Added in 7.0
195 Name == "cvtpd2ps.256" || // Added in 7.0
196 Name == "cvtps2pd.128" || // Added in 7.0
197 Name == "cvtps2pd.256" || // Added in 7.0
198 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
199 Name == "cvtqq2ps.256" || // Added in 9.0
200 Name == "cvtqq2ps.512" || // Added in 9.0
201 Name == "cvttpd2dq.256" || // Added in 7.0
202 Name == "cvttps2dq.128" || // Added in 7.0
203 Name == "cvttps2dq.256" || // Added in 7.0
204 Name.starts_with("cvtudq2pd.") || // Added in 4.0
205 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
206 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
207 Name == "cvtuqq2ps.256" || // Added in 9.0
208 Name == "cvtuqq2ps.512" || // Added in 9.0
209 Name.starts_with("dbpsadbw.") || // Added in 7.0
210 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
211 Name.starts_with("expand.b") || // Added in 9.0
212 Name.starts_with("expand.d") || // Added in 9.0
213 Name.starts_with("expand.load.") || // Added in 7.0
214 Name.starts_with("expand.p") || // Added in 9.0
215 Name.starts_with("expand.q") || // Added in 9.0
216 Name.starts_with("expand.w") || // Added in 9.0
217 Name.starts_with("fpclass.p") || // Added in 7.0
218 Name.starts_with("insert") || // Added in 4.0
219 Name.starts_with("load.") || // Added in 3.9
220 Name.starts_with("loadu.") || // Added in 3.9
221 Name.starts_with("lzcnt.") || // Added in 5.0
222 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
223 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
224 Name.starts_with("movddup") || // Added in 3.9
225 Name.starts_with("move.s") || // Added in 4.0
226 Name.starts_with("movshdup") || // Added in 3.9
227 Name.starts_with("movsldup") || // Added in 3.9
228 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
229 Name.starts_with("or.") || // Added in 3.9
230 Name.starts_with("pabs.") || // Added in 6.0
231 Name.starts_with("packssdw.") || // Added in 5.0
232 Name.starts_with("packsswb.") || // Added in 5.0
233 Name.starts_with("packusdw.") || // Added in 5.0
234 Name.starts_with("packuswb.") || // Added in 5.0
235 Name.starts_with("padd.") || // Added in 4.0
236 Name.starts_with("padds.") || // Added in 8.0
237 Name.starts_with("paddus.") || // Added in 8.0
238 Name.starts_with("palignr.") || // Added in 3.9
239 Name.starts_with("pand.") || // Added in 3.9
240 Name.starts_with("pandn.") || // Added in 3.9
241 Name.starts_with("pavg") || // Added in 6.0
242 Name.starts_with("pbroadcast") || // Added in 6.0
243 Name.starts_with("pcmpeq.") || // Added in 3.9
244 Name.starts_with("pcmpgt.") || // Added in 3.9
245 Name.starts_with("perm.df.") || // Added in 3.9
246 Name.starts_with("perm.di.") || // Added in 3.9
247 Name.starts_with("permvar.") || // Added in 7.0
248 Name.starts_with("pmaddubs.w.") || // Added in 7.0
249 Name.starts_with("pmaddw.d.") || // Added in 7.0
250 Name.starts_with("pmax") || // Added in 4.0
251 Name.starts_with("pmin") || // Added in 4.0
252 Name == "pmov.qd.256" || // Added in 9.0
253 Name == "pmov.qd.512" || // Added in 9.0
254 Name == "pmov.wb.256" || // Added in 9.0
255 Name == "pmov.wb.512" || // Added in 9.0
256 Name.starts_with("pmovsx") || // Added in 4.0
257 Name.starts_with("pmovzx") || // Added in 4.0
258 Name.starts_with("pmul.dq.") || // Added in 4.0
259 Name.starts_with("pmul.hr.sw.") || // Added in 7.0
260 Name.starts_with("pmulh.w.") || // Added in 7.0
261 Name.starts_with("pmulhu.w.") || // Added in 7.0
262 Name.starts_with("pmull.") || // Added in 4.0
263 Name.starts_with("pmultishift.qb.") || // Added in 8.0
264 Name.starts_with("pmulu.dq.") || // Added in 4.0
265 Name.starts_with("por.") || // Added in 3.9
266 Name.starts_with("prol.") || // Added in 8.0
267 Name.starts_with("prolv.") || // Added in 8.0
268 Name.starts_with("pror.") || // Added in 8.0
269 Name.starts_with("prorv.") || // Added in 8.0
270 Name.starts_with("pshuf.b.") || // Added in 4.0
271 Name.starts_with("pshuf.d.") || // Added in 3.9
272 Name.starts_with("pshufh.w.") || // Added in 3.9
273 Name.starts_with("pshufl.w.") || // Added in 3.9
274 Name.starts_with("psll.d") || // Added in 4.0
275 Name.starts_with("psll.q") || // Added in 4.0
276 Name.starts_with("psll.w") || // Added in 4.0
277 Name.starts_with("pslli") || // Added in 4.0
278 Name.starts_with("psllv") || // Added in 4.0
279 Name.starts_with("psra.d") || // Added in 4.0
280 Name.starts_with("psra.q") || // Added in 4.0
281 Name.starts_with("psra.w") || // Added in 4.0
282 Name.starts_with("psrai") || // Added in 4.0
283 Name.starts_with("psrav") || // Added in 4.0
284 Name.starts_with("psrl.d") || // Added in 4.0
285 Name.starts_with("psrl.q") || // Added in 4.0
286 Name.starts_with("psrl.w") || // Added in 4.0
287 Name.starts_with("psrli") || // Added in 4.0
288 Name.starts_with("psrlv") || // Added in 4.0
289 Name.starts_with("psub.") || // Added in 4.0
290 Name.starts_with("psubs.") || // Added in 8.0
291 Name.starts_with("psubus.") || // Added in 8.0
292 Name.starts_with("pternlog.") || // Added in 7.0
293 Name.starts_with("punpckh") || // Added in 3.9
294 Name.starts_with("punpckl") || // Added in 3.9
295 Name.starts_with("pxor.") || // Added in 3.9
296 Name.starts_with("shuf.f") || // Added in 6.0
297 Name.starts_with("shuf.i") || // Added in 6.0
298 Name.starts_with("shuf.p") || // Added in 4.0
299 Name.starts_with("sqrt.p") || // Added in 7.0
300 Name.starts_with("store.b.") || // Added in 3.9
301 Name.starts_with("store.d.") || // Added in 3.9
302 Name.starts_with("store.p") || // Added in 3.9
303 Name.starts_with("store.q.") || // Added in 3.9
304 Name.starts_with("store.w.") || // Added in 3.9
305 Name == "store.ss" || // Added in 7.0
306 Name.starts_with("storeu.") || // Added in 3.9
307 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
308 Name.starts_with("ucmp.") || // Added in 5.0
309 Name.starts_with("unpckh.") || // Added in 3.9
310 Name.starts_with("unpckl.") || // Added in 3.9
311 Name.starts_with("valign.") || // Added in 4.0
312 Name == "vcvtph2ps.128" || // Added in 11.0
313 Name == "vcvtph2ps.256" || // Added in 11.0
314 Name.starts_with("vextract") || // Added in 4.0
315 Name.starts_with("vfmadd.") || // Added in 7.0
316 Name.starts_with("vfmaddsub.") || // Added in 7.0
317 Name.starts_with("vfnmadd.") || // Added in 7.0
318 Name.starts_with("vfnmsub.") || // Added in 7.0
319 Name.starts_with("vpdpbusd.") || // Added in 7.0
320 Name.starts_with("vpdpbusds.") || // Added in 7.0
321 Name.starts_with("vpdpwssd.") || // Added in 7.0
322 Name.starts_with("vpdpwssds.") || // Added in 7.0
323 Name.starts_with("vpermi2var.") || // Added in 7.0
324 Name.starts_with("vpermil.p") || // Added in 3.9
325 Name.starts_with("vpermilvar.") || // Added in 4.0
326 Name.starts_with("vpermt2var.") || // Added in 7.0
327 Name.starts_with("vpmadd52") || // Added in 7.0
328 Name.starts_with("vpshld.") || // Added in 7.0
329 Name.starts_with("vpshldv.") || // Added in 8.0
330 Name.starts_with("vpshrd.") || // Added in 7.0
331 Name.starts_with("vpshrdv.") || // Added in 8.0
332 Name.starts_with("vpshufbitqmb.") || // Added in 8.0
333 Name.starts_with("xor.")); // Added in 3.9
334
335 if (Name.consume_front("mask3."))
336 // 'avx512.mask3.*'
337 return (Name.starts_with("vfmadd.") || // Added in 7.0
338 Name.starts_with("vfmaddsub.") || // Added in 7.0
339 Name.starts_with("vfmsub.") || // Added in 7.0
340 Name.starts_with("vfmsubadd.") || // Added in 7.0
341 Name.starts_with("vfnmsub.")); // Added in 7.0
342
343 if (Name.consume_front("maskz."))
344 // 'avx512.maskz.*'
345 return (Name.starts_with("pternlog.") || // Added in 7.0
346 Name.starts_with("vfmadd.") || // Added in 7.0
347 Name.starts_with("vfmaddsub.") || // Added in 7.0
348 Name.starts_with("vpdpbusd.") || // Added in 7.0
349 Name.starts_with("vpdpbusds.") || // Added in 7.0
350 Name.starts_with("vpdpwssd.") || // Added in 7.0
351 Name.starts_with("vpdpwssds.") || // Added in 7.0
352 Name.starts_with("vpermt2var.") || // Added in 7.0
353 Name.starts_with("vpmadd52") || // Added in 7.0
354 Name.starts_with("vpshldv.") || // Added in 8.0
355 Name.starts_with("vpshrdv.")); // Added in 8.0
356
357 // 'avx512.*'
358 return (Name == "movntdqa" || // Added in 5.0
359 Name == "pmul.dq.512" || // Added in 7.0
360 Name == "pmulu.dq.512" || // Added in 7.0
361 Name.starts_with("broadcastm") || // Added in 6.0
362 Name.starts_with("cmp.p") || // Added in 12.0
363 Name.starts_with("cvtb2mask.") || // Added in 7.0
364 Name.starts_with("cvtd2mask.") || // Added in 7.0
365 Name.starts_with("cvtmask2") || // Added in 5.0
366 Name.starts_with("cvtq2mask.") || // Added in 7.0
367 Name == "cvtusi2sd" || // Added in 7.0
368 Name.starts_with("cvtw2mask.") || // Added in 7.0
369 Name == "kand.w" || // Added in 7.0
370 Name == "kandn.w" || // Added in 7.0
371 Name == "knot.w" || // Added in 7.0
372 Name == "kor.w" || // Added in 7.0
373 Name == "kortestc.w" || // Added in 7.0
374 Name == "kortestz.w" || // Added in 7.0
375 Name.starts_with("kunpck") || // added in 6.0
376 Name == "kxnor.w" || // Added in 7.0
377 Name == "kxor.w" || // Added in 7.0
378 Name.starts_with("padds.") || // Added in 8.0
379 Name.starts_with("pbroadcast") || // Added in 3.9
380 Name.starts_with("prol") || // Added in 8.0
381 Name.starts_with("pror") || // Added in 8.0
382 Name.starts_with("psll.dq") || // Added in 3.9
383 Name.starts_with("psrl.dq") || // Added in 3.9
384 Name.starts_with("psubs.") || // Added in 8.0
385 Name.starts_with("ptestm") || // Added in 6.0
386 Name.starts_with("ptestnm") || // Added in 6.0
387 Name.starts_with("storent.") || // Added in 3.9
388 Name.starts_with("vbroadcast.s") || // Added in 7.0
389 Name.starts_with("vpshld.") || // Added in 8.0
390 Name.starts_with("vpshrd.")); // Added in 8.0
391 }
392
393 if (Name.consume_front("fma."))
394 return (Name.starts_with("vfmadd.") || // Added in 7.0
395 Name.starts_with("vfmsub.") || // Added in 7.0
396 Name.starts_with("vfmsubadd.") || // Added in 7.0
397 Name.starts_with("vfnmadd.") || // Added in 7.0
398 Name.starts_with("vfnmsub.")); // Added in 7.0
399
400 if (Name.consume_front("fma4."))
401 return Name.starts_with("vfmadd.s"); // Added in 7.0
402
403 if (Name.consume_front("sse."))
404 return (Name == "add.ss" || // Added in 4.0
405 Name == "cvtsi2ss" || // Added in 7.0
406 Name == "cvtsi642ss" || // Added in 7.0
407 Name == "div.ss" || // Added in 4.0
408 Name == "mul.ss" || // Added in 4.0
409 Name.starts_with("sqrt.p") || // Added in 7.0
410 Name == "sqrt.ss" || // Added in 7.0
411 Name.starts_with("storeu.") || // Added in 3.9
412 Name == "sub.ss"); // Added in 4.0
413
414 if (Name.consume_front("sse2."))
415 return (Name == "add.sd" || // Added in 4.0
416 Name == "cvtdq2pd" || // Added in 3.9
417 Name == "cvtdq2ps" || // Added in 7.0
418 Name == "cvtps2pd" || // Added in 3.9
419 Name == "cvtsi2sd" || // Added in 7.0
420 Name == "cvtsi642sd" || // Added in 7.0
421 Name == "cvtss2sd" || // Added in 7.0
422 Name == "div.sd" || // Added in 4.0
423 Name == "mul.sd" || // Added in 4.0
424 Name.starts_with("padds.") || // Added in 8.0
425 Name.starts_with("paddus.") || // Added in 8.0
426 Name.starts_with("pcmpeq.") || // Added in 3.1
427 Name.starts_with("pcmpgt.") || // Added in 3.1
428 Name == "pmaxs.w" || // Added in 3.9
429 Name == "pmaxu.b" || // Added in 3.9
430 Name == "pmins.w" || // Added in 3.9
431 Name == "pminu.b" || // Added in 3.9
432 Name == "pmulu.dq" || // Added in 7.0
433 Name.starts_with("pshuf") || // Added in 3.9
434 Name.starts_with("psll.dq") || // Added in 3.7
435 Name.starts_with("psrl.dq") || // Added in 3.7
436 Name.starts_with("psubs.") || // Added in 8.0
437 Name.starts_with("psubus.") || // Added in 8.0
438 Name.starts_with("sqrt.p") || // Added in 7.0
439 Name == "sqrt.sd" || // Added in 7.0
440 Name == "storel.dq" || // Added in 3.9
441 Name.starts_with("storeu.") || // Added in 3.9
442 Name == "sub.sd"); // Added in 4.0
443
444 if (Name.consume_front("sse41."))
445 return (Name.starts_with("blendp") || // Added in 3.7
446 Name == "movntdqa" || // Added in 5.0
447 Name == "pblendw" || // Added in 3.7
448 Name == "pmaxsb" || // Added in 3.9
449 Name == "pmaxsd" || // Added in 3.9
450 Name == "pmaxud" || // Added in 3.9
451 Name == "pmaxuw" || // Added in 3.9
452 Name == "pminsb" || // Added in 3.9
453 Name == "pminsd" || // Added in 3.9
454 Name == "pminud" || // Added in 3.9
455 Name == "pminuw" || // Added in 3.9
456 Name.starts_with("pmovsx") || // Added in 3.8
457 Name.starts_with("pmovzx") || // Added in 3.9
458 Name == "pmuldq"); // Added in 7.0
459
460 if (Name.consume_front("sse42."))
461 return Name == "crc32.64.8"; // Added in 3.4
462
463 if (Name.consume_front("sse4a."))
464 return Name.starts_with("movnt."); // Added in 3.9
465
466 if (Name.consume_front("ssse3."))
467 return (Name == "pabs.b.128" || // Added in 6.0
468 Name == "pabs.d.128" || // Added in 6.0
469 Name == "pabs.w.128"); // Added in 6.0
470
471 if (Name.consume_front("xop."))
472 return (Name == "vpcmov" || // Added in 3.8
473 Name == "vpcmov.256" || // Added in 5.0
474 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
475 Name.starts_with("vprot")); // Added in 8.0
476
477 return (Name == "addcarry.u32" || // Added in 8.0
478 Name == "addcarry.u64" || // Added in 8.0
479 Name == "addcarryx.u32" || // Added in 8.0
480 Name == "addcarryx.u64" || // Added in 8.0
481 Name == "subborrow.u32" || // Added in 8.0
482 Name == "subborrow.u64" || // Added in 8.0
483 Name.starts_with("vcvtph2ps.")); // Added in 11.0
484}
485
487 Function *&NewFn) {
488 // Only handle intrinsics that start with "x86.".
489 if (!Name.consume_front("x86."))
490 return false;
491
493 NewFn = nullptr;
494 return true;
495 }
496
497 if (Name == "rdtscp") { // Added in 8.0
498 // If this intrinsic has 0 operands, it's the new version.
499 if (F->getFunctionType()->getNumParams() == 0)
500 return false;
501
502 rename(F);
503 NewFn = Intrinsic::getDeclaration(F->getParent(),
504 Intrinsic::x86_rdtscp);
505 return true;
506 }
507
509
510 // SSE4.1 ptest functions may have an old signature.
511 if (Name.consume_front("sse41.ptest")) { // Added in 3.2
513 .Case("c", Intrinsic::x86_sse41_ptestc)
514 .Case("z", Intrinsic::x86_sse41_ptestz)
515 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
518 return upgradePTESTIntrinsic(F, ID, NewFn);
519
520 return false;
521 }
522
523 // Several blend and other instructions with masks used the wrong number of
524 // bits.
525
526 // Added in 3.6
528 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
529 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
530 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
531 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
532 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
533 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
536 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
537
538 if (Name.consume_front("avx512.mask.cmp.")) {
539 // Added in 7.0
541 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
542 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
543 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
544 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
545 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
546 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
549 return upgradeX86MaskedFPCompare(F, ID, NewFn);
550 return false; // No other 'x86.avx523.mask.cmp.*'.
551 }
552
553 if (Name.consume_front("avx512bf16.")) {
554 // Added in 9.0
556 .Case("cvtne2ps2bf16.128",
557 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
558 .Case("cvtne2ps2bf16.256",
559 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
560 .Case("cvtne2ps2bf16.512",
561 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
562 .Case("mask.cvtneps2bf16.128",
563 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
564 .Case("cvtneps2bf16.256",
565 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
566 .Case("cvtneps2bf16.512",
567 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
570 return upgradeX86BF16Intrinsic(F, ID, NewFn);
571
572 // Added in 9.0
574 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
575 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
576 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
579 return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
580 return false; // No other 'x86.avx512bf16.*'.
581 }
582
583 if (Name.consume_front("xop.")) {
585 if (Name.starts_with("vpermil2")) { // Added in 3.9
586 // Upgrade any XOP PERMIL2 index operand still using a float/double
587 // vector.
588 auto Idx = F->getFunctionType()->getParamType(2);
589 if (Idx->isFPOrFPVectorTy()) {
590 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
591 unsigned EltSize = Idx->getScalarSizeInBits();
592 if (EltSize == 64 && IdxSize == 128)
593 ID = Intrinsic::x86_xop_vpermil2pd;
594 else if (EltSize == 32 && IdxSize == 128)
595 ID = Intrinsic::x86_xop_vpermil2ps;
596 else if (EltSize == 64 && IdxSize == 256)
597 ID = Intrinsic::x86_xop_vpermil2pd_256;
598 else
599 ID = Intrinsic::x86_xop_vpermil2ps_256;
600 }
601 } else if (F->arg_size() == 2)
602 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
604 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
605 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
607
609 rename(F);
610 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
611 return true;
612 }
613 return false; // No other 'x86.xop.*'
614 }
615
616 if (Name == "seh.recoverfp") {
617 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
618 return true;
619 }
620
621 return false;
622}
623
624// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
625// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
628 Function *&NewFn) {
629 if (Name.starts_with("rbit")) {
630 // '(arm|aarch64).rbit'.
631 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
632 F->arg_begin()->getType());
633 return true;
634 }
635
636 if (Name == "thread.pointer") {
637 // '(arm|aarch64).thread.pointer'.
638 NewFn =
639 Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
640 return true;
641 }
642
643 bool Neon = Name.consume_front("neon.");
644 if (Neon) {
645 // '(arm|aarch64).neon.*'.
646 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
647 // v16i8 respectively.
648 if (Name.consume_front("bfdot.")) {
649 // (arm|aarch64).neon.bfdot.*'.
652 .Cases("v2f32.v8i8", "v4f32.v16i8",
653 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
654 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
657 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
658 assert((OperandWidth == 64 || OperandWidth == 128) &&
659 "Unexpected operand width");
660 LLVMContext &Ctx = F->getParent()->getContext();
661 std::array<Type *, 2> Tys{
662 {F->getReturnType(),
663 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
664 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
665 return true;
666 }
667 return false; // No other '(arm|aarch64).neon.bfdot.*'.
668 }
669
670 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
671 // anymore and accept v8bf16 instead of v16i8.
672 if (Name.consume_front("bfm")) {
673 // (arm|aarch64).neon.bfm*'.
674 if (Name.consume_back(".v4f32.v16i8")) {
675 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
678 .Case("mla",
679 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
680 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
681 .Case("lalb",
682 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
683 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
684 .Case("lalt",
685 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
686 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
689 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
690 return true;
691 }
692 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
693 }
694 return false; // No other '(arm|aarch64).neon.bfm*.
695 }
696 // Continue on to Aarch64 Neon or Arm Neon.
697 }
698 // Continue on to Arm or Aarch64.
699
700 if (IsArm) {
701 // 'arm.*'.
702 if (Neon) {
703 // 'arm.neon.*'.
705 .StartsWith("vclz.", Intrinsic::ctlz)
706 .StartsWith("vcnt.", Intrinsic::ctpop)
707 .StartsWith("vqadds.", Intrinsic::sadd_sat)
708 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
709 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
710 .StartsWith("vqsubu.", Intrinsic::usub_sat)
713 NewFn = Intrinsic::getDeclaration(F->getParent(), ID,
714 F->arg_begin()->getType());
715 return true;
716 }
717
718 if (Name.consume_front("vst")) {
719 // 'arm.neon.vst*'.
720 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
722 if (vstRegex.match(Name, &Groups)) {
723 static const Intrinsic::ID StoreInts[] = {
724 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
725 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
726
727 static const Intrinsic::ID StoreLaneInts[] = {
728 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
729 Intrinsic::arm_neon_vst4lane};
730
731 auto fArgs = F->getFunctionType()->params();
732 Type *Tys[] = {fArgs[0], fArgs[1]};
733 if (Groups[1].size() == 1)
734 NewFn = Intrinsic::getDeclaration(F->getParent(),
735 StoreInts[fArgs.size() - 3], Tys);
736 else
738 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
739 return true;
740 }
741 return false; // No other 'arm.neon.vst*'.
742 }
743
744 return false; // No other 'arm.neon.*'.
745 }
746
747 if (Name.consume_front("mve.")) {
748 // 'arm.mve.*'.
749 if (Name == "vctp64") {
750 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
751 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
752 // the function and deal with it below in UpgradeIntrinsicCall.
753 rename(F);
754 return true;
755 }
756 return false; // Not 'arm.mve.vctp64'.
757 }
758
759 // These too are changed to accept a v2i1 instead of the old v4i1.
760 if (Name.consume_back(".v4i1")) {
761 // 'arm.mve.*.v4i1'.
762 if (Name.consume_back(".predicated.v2i64.v4i32"))
763 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
764 return Name == "mull.int" || Name == "vqdmull";
765
766 if (Name.consume_back(".v2i64")) {
767 // 'arm.mve.*.v2i64.v4i1'
768 bool IsGather = Name.consume_front("vldr.gather.");
769 if (IsGather || Name.consume_front("vstr.scatter.")) {
770 if (Name.consume_front("base.")) {
771 // Optional 'wb.' prefix.
772 Name.consume_front("wb.");
773 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
774 // predicated.v2i64.v2i64.v4i1'.
775 return Name == "predicated.v2i64";
776 }
777
778 if (Name.consume_front("offset.predicated."))
779 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
780 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
781
782 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
783 return false;
784 }
785
786 return false; // No other 'arm.mve.*.v2i64.v4i1'.
787 }
788 return false; // No other 'arm.mve.*.v4i1'.
789 }
790 return false; // No other 'arm.mve.*'.
791 }
792
793 if (Name.consume_front("cde.vcx")) {
794 // 'arm.cde.vcx*'.
795 if (Name.consume_back(".predicated.v2i64.v4i1"))
796 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
797 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
798 Name == "3q" || Name == "3qa";
799
800 return false; // No other 'arm.cde.vcx*'.
801 }
802 } else {
803 // 'aarch64.*'.
804 if (Neon) {
805 // 'aarch64.neon.*'.
807 .StartsWith("frintn", Intrinsic::roundeven)
808 .StartsWith("rbit", Intrinsic::bitreverse)
811 NewFn = Intrinsic::getDeclaration(F->getParent(), ID,
812 F->arg_begin()->getType());
813 return true;
814 }
815
816 if (Name.starts_with("addp")) {
817 // 'aarch64.neon.addp*'.
818 if (F->arg_size() != 2)
819 return false; // Invalid IR.
820 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
821 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
822 NewFn = Intrinsic::getDeclaration(F->getParent(),
823 Intrinsic::aarch64_neon_faddp, Ty);
824 return true;
825 }
826 }
827 return false; // No other 'aarch64.neon.*'.
828 }
829 if (Name.consume_front("sve.")) {
830 // 'aarch64.sve.*'.
831 if (Name.consume_front("bf")) {
832 if (Name.consume_back(".lane")) {
833 // 'aarch64.sve.bf*.lane'.
836 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
837 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
838 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
841 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
842 return true;
843 }
844 return false; // No other 'aarch64.sve.bf*.lane'.
845 }
846 return false; // No other 'aarch64.sve.bf*'.
847 }
848
849 if (Name.consume_front("ld")) {
850 // 'aarch64.sve.ld*'.
851 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
852 if (LdRegex.match(Name)) {
853 Type *ScalarTy =
854 dyn_cast<VectorType>(F->getReturnType())->getElementType();
855 ElementCount EC = dyn_cast<VectorType>(F->arg_begin()->getType())
856 ->getElementCount();
857 Type *Ty = VectorType::get(ScalarTy, EC);
858 static const Intrinsic::ID LoadIDs[] = {
859 Intrinsic::aarch64_sve_ld2_sret,
860 Intrinsic::aarch64_sve_ld3_sret,
861 Intrinsic::aarch64_sve_ld4_sret,
862 };
863 NewFn = Intrinsic::getDeclaration(F->getParent(),
864 LoadIDs[Name[0] - '2'], Ty);
865 return true;
866 }
867 return false; // No other 'aarch64.sve.ld*'.
868 }
869
870 if (Name.consume_front("tuple.")) {
871 // 'aarch64.sve.tuple.*'.
872 if (Name.starts_with("get")) {
873 // 'aarch64.sve.tuple.get*'.
874 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
875 NewFn = Intrinsic::getDeclaration(F->getParent(),
876 Intrinsic::vector_extract, Tys);
877 return true;
878 }
879
880 if (Name.starts_with("set")) {
881 // 'aarch64.sve.tuple.set*'.
882 auto Args = F->getFunctionType()->params();
883 Type *Tys[] = {Args[0], Args[2], Args[1]};
884 NewFn = Intrinsic::getDeclaration(F->getParent(),
885 Intrinsic::vector_insert, Tys);
886 return true;
887 }
888
889 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
890 if (CreateTupleRegex.match(Name)) {
891 // 'aarch64.sve.tuple.create*'.
892 auto Args = F->getFunctionType()->params();
893 Type *Tys[] = {F->getReturnType(), Args[1]};
894 NewFn = Intrinsic::getDeclaration(F->getParent(),
895 Intrinsic::vector_insert, Tys);
896 return true;
897 }
898 return false; // No other 'aarch64.sve.tuple.*'.
899 }
900 return false; // No other 'aarch64.sve.*'.
901 }
902 }
903 return false; // No other 'arm.*', 'aarch64.*'.
904}
905
907 if (Name.consume_front("abs."))
909 .Case("bf16", Intrinsic::nvvm_abs_bf16)
910 .Case("bf16x2", Intrinsic::nvvm_abs_bf16x2)
912
913 if (Name.consume_front("fma.rn."))
915 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
916 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
917 .Case("ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
918 .Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
919 .Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
920 .Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
921 .Case("ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
922 .Case("ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
923 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
924 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
925 .Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
926 .Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
928
929 if (Name.consume_front("fmax."))
931 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
932 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
933 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
934 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
935 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
936 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
937 .Case("ftz.nan.xorsign.abs.bf16",
938 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
939 .Case("ftz.nan.xorsign.abs.bf16x2",
940 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
941 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
942 .Case("ftz.xorsign.abs.bf16x2",
943 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
944 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
945 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
946 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
947 .Case("nan.xorsign.abs.bf16x2",
948 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
949 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
950 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
952
953 if (Name.consume_front("fmin."))
955 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
956 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
957 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
958 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
959 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
960 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
961 .Case("ftz.nan.xorsign.abs.bf16",
962 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
963 .Case("ftz.nan.xorsign.abs.bf16x2",
964 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
965 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
966 .Case("ftz.xorsign.abs.bf16x2",
967 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
968 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
969 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
970 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
971 .Case("nan.xorsign.abs.bf16x2",
972 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
973 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
974 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
976
977 if (Name.consume_front("neg."))
979 .Case("bf16", Intrinsic::nvvm_neg_bf16)
980 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
982
984}
985
987 bool CanUpgradeDebugIntrinsicsToRecords) {
988 assert(F && "Illegal to upgrade a non-existent Function.");
989
990 StringRef Name = F->getName();
991
992 // Quickly eliminate it, if it's not a candidate.
993 if (!Name.consume_front("llvm.") || Name.empty())
994 return false;
995
996 switch (Name[0]) {
997 default: break;
998 case 'a': {
999 bool IsArm = Name.consume_front("arm.");
1000 if (IsArm || Name.consume_front("aarch64.")) {
1001 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1002 return true;
1003 break;
1004 }
1005
1006 if (Name.consume_front("amdgcn.")) {
1007 if (Name == "alignbit") {
1008 // Target specific intrinsic became redundant
1009 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::fshr,
1010 {F->getReturnType()});
1011 return true;
1012 }
1013
1014 if (Name.consume_front("atomic.")) {
1015 if (Name.starts_with("inc") || Name.starts_with("dec")) {
1016 // These were replaced with atomicrmw uinc_wrap and udec_wrap, so
1017 // there's no new declaration.
1018 NewFn = nullptr;
1019 return true;
1020 }
1021 break; // No other 'amdgcn.atomic.*'
1022 }
1023
1024 if (Name.starts_with("ldexp.")) {
1025 // Target specific intrinsic became redundant
1027 F->getParent(), Intrinsic::ldexp,
1028 {F->getReturnType(), F->getArg(1)->getType()});
1029 return true;
1030 }
1031 break; // No other 'amdgcn.*'
1032 }
1033
1034 break;
1035 }
1036 case 'c': {
1037 if (F->arg_size() == 1) {
1039 .StartsWith("ctlz.", Intrinsic::ctlz)
1040 .StartsWith("cttz.", Intrinsic::cttz)
1043 rename(F);
1044 NewFn = Intrinsic::getDeclaration(F->getParent(), ID,
1045 F->arg_begin()->getType());
1046 return true;
1047 }
1048 }
1049
1050 if (F->arg_size() == 2 && Name.equals("coro.end")) {
1051 rename(F);
1052 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::coro_end);
1053 return true;
1054 }
1055
1056 break;
1057 }
1058 case 'd':
1059 if (Name.consume_front("dbg.")) {
1060 // Mark debug intrinsics for upgrade to new debug format.
1061 if (CanUpgradeDebugIntrinsicsToRecords &&
1062 F->getParent()->IsNewDbgInfoFormat) {
1063 if (Name == "addr" || Name == "value" || Name == "assign" ||
1064 Name == "declare" || Name == "label") {
1065 // There's no function to replace these with.
1066 NewFn = nullptr;
1067 // But we do want these to get upgraded.
1068 return true;
1069 }
1070 }
1071 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1072 // converted to DbgVariableRecords later.
1073 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1074 rename(F);
1075 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
1076 return true;
1077 }
1078 break; // No other 'dbg.*'.
1079 }
1080 break;
1081 case 'e':
1082 if (Name.consume_front("experimental.vector.")) {
1084 .StartsWith("extract.", Intrinsic::vector_extract)
1085 .StartsWith("insert.", Intrinsic::vector_insert)
1088 const auto *FT = F->getFunctionType();
1090 if (ID == Intrinsic::vector_extract)
1091 // Extracting overloads the return type.
1092 Tys.push_back(FT->getReturnType());
1093 Tys.push_back(FT->getParamType(0));
1094 if (ID == Intrinsic::vector_insert)
1095 // Inserting overloads the inserted type.
1096 Tys.push_back(FT->getParamType(1));
1097 rename(F);
1098 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
1099 return true;
1100 }
1101
1102 if (Name.consume_front("reduce.")) {
1104 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1105 if (R.match(Name, &Groups))
1107 .Case("add", Intrinsic::vector_reduce_add)
1108 .Case("mul", Intrinsic::vector_reduce_mul)
1109 .Case("and", Intrinsic::vector_reduce_and)
1110 .Case("or", Intrinsic::vector_reduce_or)
1111 .Case("xor", Intrinsic::vector_reduce_xor)
1112 .Case("smax", Intrinsic::vector_reduce_smax)
1113 .Case("smin", Intrinsic::vector_reduce_smin)
1114 .Case("umax", Intrinsic::vector_reduce_umax)
1115 .Case("umin", Intrinsic::vector_reduce_umin)
1116 .Case("fmax", Intrinsic::vector_reduce_fmax)
1117 .Case("fmin", Intrinsic::vector_reduce_fmin)
1119
1120 bool V2 = false;
1122 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1123 Groups.clear();
1124 V2 = true;
1125 if (R2.match(Name, &Groups))
1127 .Case("fadd", Intrinsic::vector_reduce_fadd)
1128 .Case("fmul", Intrinsic::vector_reduce_fmul)
1130 }
1132 rename(F);
1133 auto Args = F->getFunctionType()->params();
1134 NewFn =
1135 Intrinsic::getDeclaration(F->getParent(), ID, {Args[V2 ? 1 : 0]});
1136 return true;
1137 }
1138 break; // No other 'expermental.vector.reduce.*'.
1139 }
1140 break; // No other 'experimental.vector.*'.
1141 }
1142 break; // No other 'e*'.
1143 case 'f':
1144 if (Name.starts_with("flt.rounds")) {
1145 rename(F);
1146 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::get_rounding);
1147 return true;
1148 }
1149 break;
1150 case 'i':
1151 if (Name.starts_with("invariant.group.barrier")) {
1152 // Rename invariant.group.barrier to launder.invariant.group
1153 auto Args = F->getFunctionType()->params();
1154 Type* ObjectPtr[1] = {Args[0]};
1155 rename(F);
1156 NewFn = Intrinsic::getDeclaration(F->getParent(),
1157 Intrinsic::launder_invariant_group, ObjectPtr);
1158 return true;
1159 }
1160 break;
1161 case 'm': {
1162 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1163 // alignment parameter to embedding the alignment as an attribute of
1164 // the pointer args.
1165 if (unsigned ID = StringSwitch<unsigned>(Name)
1166 .StartsWith("memcpy.", Intrinsic::memcpy)
1167 .StartsWith("memmove.", Intrinsic::memmove)
1168 .Default(0)) {
1169 if (F->arg_size() == 5) {
1170 rename(F);
1171 // Get the types of dest, src, and len
1172 ArrayRef<Type *> ParamTypes =
1173 F->getFunctionType()->params().slice(0, 3);
1174 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ParamTypes);
1175 return true;
1176 }
1177 }
1178 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1179 rename(F);
1180 // Get the types of dest, and len
1181 const auto *FT = F->getFunctionType();
1182 Type *ParamTypes[2] = {
1183 FT->getParamType(0), // Dest
1184 FT->getParamType(2) // len
1185 };
1186 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
1187 ParamTypes);
1188 return true;
1189 }
1190 break;
1191 }
1192 case 'n': {
1193 if (Name.consume_front("nvvm.")) {
1194 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1195 if (F->arg_size() == 1) {
1196 Intrinsic::ID IID =
1198 .Cases("brev32", "brev64", Intrinsic::bitreverse)
1199 .Case("clz.i", Intrinsic::ctlz)
1200 .Case("popc.i", Intrinsic::ctpop)
1202 if (IID != Intrinsic::not_intrinsic) {
1203 NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
1204 {F->getReturnType()});
1205 return true;
1206 }
1207 }
1208
1209 // Check for nvvm intrinsics that need a return type adjustment.
1210 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1212 if (IID != Intrinsic::not_intrinsic) {
1213 NewFn = nullptr;
1214 return true;
1215 }
1216 }
1217
1218 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1219 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1220 //
1221 // TODO: We could add lohi.i2d.
1222 bool Expand = false;
1223 if (Name.consume_front("abs."))
1224 // nvvm.abs.{i,ii}
1225 Expand = Name == "i" || Name == "ll";
1226 else if (Name == "clz.ll" || Name == "popc.ll" || Name == "h2f")
1227 Expand = true;
1228 else if (Name.consume_front("max.") || Name.consume_front("min."))
1229 // nvvm.{min,max}.{i,ii,ui,ull}
1230 Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
1231 Name == "ui" || Name == "ull";
1232 else if (Name.consume_front("atomic.load.add."))
1233 // nvvm.atomic.load.add.{f32.p,f64.p}
1234 Expand = Name.starts_with("f32.p") || Name.starts_with("f64.p");
1235 else
1236 Expand = false;
1237
1238 if (Expand) {
1239 NewFn = nullptr;
1240 return true;
1241 }
1242 break; // No other 'nvvm.*'.
1243 }
1244 break;
1245 }
1246 case 'o':
1247 // We only need to change the name to match the mangling including the
1248 // address space.
1249 if (Name.starts_with("objectsize.")) {
1250 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1251 if (F->arg_size() == 2 || F->arg_size() == 3 ||
1252 F->getName() !=
1253 Intrinsic::getName(Intrinsic::objectsize, Tys, F->getParent())) {
1254 rename(F);
1255 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
1256 Tys);
1257 return true;
1258 }
1259 }
1260 break;
1261
1262 case 'p':
1263 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1264 rename(F);
1266 F->getParent(), Intrinsic::ptr_annotation,
1267 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1268 return true;
1269 }
1270 break;
1271
1272 case 'r': {
1273 if (Name.consume_front("riscv.")) {
1276 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1277 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1278 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1279 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1282 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1283 rename(F);
1284 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1285 return true;
1286 }
1287 break; // No other applicable upgrades.
1288 }
1289
1291 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1292 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1295 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1296 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1297 rename(F);
1298 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1299 return true;
1300 }
1301 break; // No other applicable upgrades.
1302 }
1303
1305 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1306 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1307 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1308 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1309 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1310 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1313 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1314 rename(F);
1315 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1316 return true;
1317 }
1318 break; // No other applicable upgrades.
1319 }
1320 break; // No other 'riscv.*' intrinsics
1321 }
1322 } break;
1323
1324 case 's':
1325 if (Name == "stackprotectorcheck") {
1326 NewFn = nullptr;
1327 return true;
1328 }
1329 break;
1330
1331 case 'v': {
1332 if (Name == "var.annotation" && F->arg_size() == 4) {
1333 rename(F);
1335 F->getParent(), Intrinsic::var_annotation,
1336 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1337 return true;
1338 }
1339 break;
1340 }
1341
1342 case 'w':
1343 if (Name.consume_front("wasm.")) {
1346 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1347 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1348 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1351 rename(F);
1352 NewFn =
1353 Intrinsic::getDeclaration(F->getParent(), ID, F->getReturnType());
1354 return true;
1355 }
1356
1357 if (Name.consume_front("dot.i8x16.i7x16.")) {
1359 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1360 .Case("add.signed",
1361 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1364 rename(F);
1365 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1366 return true;
1367 }
1368 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1369 }
1370 break; // No other 'wasm.*'.
1371 }
1372 break;
1373
1374 case 'x':
1375 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1376 return true;
1377 }
1378
1379 auto *ST = dyn_cast<StructType>(F->getReturnType());
1380 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1381 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1382 // Replace return type with literal non-packed struct. Only do this for
1383 // intrinsics declared to return a struct, not for intrinsics with
1384 // overloaded return type, in which case the exact struct type will be
1385 // mangled into the name.
1388 if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1389 auto *FT = F->getFunctionType();
1390 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1391 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1392 std::string Name = F->getName().str();
1393 rename(F);
1394 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1395 Name, F->getParent());
1396
1397 // The new function may also need remangling.
1398 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1399 NewFn = *Result;
1400 return true;
1401 }
1402 }
1403
1404 // Remangle our intrinsic since we upgrade the mangling
1406 if (Result != std::nullopt) {
1407 NewFn = *Result;
1408 return true;
1409 }
1410
1411 // This may not belong here. This function is effectively being overloaded
1412 // to both detect an intrinsic which needs upgrading, and to provide the
1413 // upgraded form of the intrinsic. We should perhaps have two separate
1414 // functions for this.
1415 return false;
1416}
1417
1419 bool CanUpgradeDebugIntrinsicsToRecords) {
1420 NewFn = nullptr;
1421 bool Upgraded =
1422 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1423 assert(F != NewFn && "Intrinsic function upgraded to the same function");
1424
1425 // Upgrade intrinsic attributes. This does not change the function.
1426 if (NewFn)
1427 F = NewFn;
1428 if (Intrinsic::ID id = F->getIntrinsicID())
1429 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
1430 return Upgraded;
1431}
1432
1434 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1435 GV->getName() == "llvm.global_dtors")) ||
1436 !GV->hasInitializer())
1437 return nullptr;
1438 ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
1439 if (!ATy)
1440 return nullptr;
1441 StructType *STy = dyn_cast<StructType>(ATy->getElementType());
1442 if (!STy || STy->getNumElements() != 2)
1443 return nullptr;
1444
1445 LLVMContext &C = GV->getContext();
1446 IRBuilder<> IRB(C);
1447 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1448 IRB.getPtrTy());
1449 Constant *Init = GV->getInitializer();
1450 unsigned N = Init->getNumOperands();
1451 std::vector<Constant *> NewCtors(N);
1452 for (unsigned i = 0; i != N; ++i) {
1453 auto Ctor = cast<Constant>(Init->getOperand(i));
1454 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1455 Ctor->getAggregateElement(1),
1457 }
1458 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1459
1460 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1461 NewInit, GV->getName());
1462}
1463
1464// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1465// to byte shuffles.
1467 unsigned Shift) {
1468 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1469 unsigned NumElts = ResultTy->getNumElements() * 8;
1470
1471 // Bitcast from a 64-bit element type to a byte element type.
1472 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1473 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1474
1475 // We'll be shuffling in zeroes.
1476 Value *Res = Constant::getNullValue(VecTy);
1477
1478 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1479 // we'll just return the zero vector.
1480 if (Shift < 16) {
1481 int Idxs[64];
1482 // 256/512-bit version is split into 2/4 16-byte lanes.
1483 for (unsigned l = 0; l != NumElts; l += 16)
1484 for (unsigned i = 0; i != 16; ++i) {
1485 unsigned Idx = NumElts + i - Shift;
1486 if (Idx < NumElts)
1487 Idx -= NumElts - 16; // end of lane, switch operand.
1488 Idxs[l + i] = Idx + l;
1489 }
1490
1491 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1492 }
1493
1494 // Bitcast back to a 64-bit element type.
1495 return Builder.CreateBitCast(Res, ResultTy, "cast");
1496}
1497
1498// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1499// to byte shuffles.
1501 unsigned Shift) {
1502 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1503 unsigned NumElts = ResultTy->getNumElements() * 8;
1504
1505 // Bitcast from a 64-bit element type to a byte element type.
1506 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1507 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1508
1509 // We'll be shuffling in zeroes.
1510 Value *Res = Constant::getNullValue(VecTy);
1511
1512 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1513 // we'll just return the zero vector.
1514 if (Shift < 16) {
1515 int Idxs[64];
1516 // 256/512-bit version is split into 2/4 16-byte lanes.
1517 for (unsigned l = 0; l != NumElts; l += 16)
1518 for (unsigned i = 0; i != 16; ++i) {
1519 unsigned Idx = i + Shift;
1520 if (Idx >= 16)
1521 Idx += NumElts - 16; // end of lane, switch operand.
1522 Idxs[l + i] = Idx + l;
1523 }
1524
1525 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
1526 }
1527
1528 // Bitcast back to a 64-bit element type.
1529 return Builder.CreateBitCast(Res, ResultTy, "cast");
1530}
1531
1532static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1533 unsigned NumElts) {
1534 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1536 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1537 Mask = Builder.CreateBitCast(Mask, MaskTy);
1538
1539 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1540 // i8 and we need to extract down to the right number of elements.
1541 if (NumElts <= 4) {
1542 int Indices[4];
1543 for (unsigned i = 0; i != NumElts; ++i)
1544 Indices[i] = i;
1545 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
1546 "extract");
1547 }
1548
1549 return Mask;
1550}
1551
1552static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1553 Value *Op1) {
1554 // If the mask is all ones just emit the first operation.
1555 if (const auto *C = dyn_cast<Constant>(Mask))
1556 if (C->isAllOnesValue())
1557 return Op0;
1558
1559 Mask = getX86MaskVec(Builder, Mask,
1560 cast<FixedVectorType>(Op0->getType())->getNumElements());
1561 return Builder.CreateSelect(Mask, Op0, Op1);
1562}
1563
1564static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1565 Value *Op1) {
1566 // If the mask is all ones just emit the first operation.
1567 if (const auto *C = dyn_cast<Constant>(Mask))
1568 if (C->isAllOnesValue())
1569 return Op0;
1570
1571 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1572 Mask->getType()->getIntegerBitWidth());
1573 Mask = Builder.CreateBitCast(Mask, MaskTy);
1574 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1575 return Builder.CreateSelect(Mask, Op0, Op1);
1576}
1577
1578// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1579// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1580// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1582 Value *Op1, Value *Shift,
1583 Value *Passthru, Value *Mask,
1584 bool IsVALIGN) {
1585 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1586
1587 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1588 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1589 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1590 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1591
1592 // Mask the immediate for VALIGN.
1593 if (IsVALIGN)
1594 ShiftVal &= (NumElts - 1);
1595
1596 // If palignr is shifting the pair of vectors more than the size of two
1597 // lanes, emit zero.
1598 if (ShiftVal >= 32)
1600
1601 // If palignr is shifting the pair of input vectors more than one lane,
1602 // but less than two lanes, convert to shifting in zeroes.
1603 if (ShiftVal > 16) {
1604 ShiftVal -= 16;
1605 Op1 = Op0;
1607 }
1608
1609 int Indices[64];
1610 // 256-bit palignr operates on 128-bit lanes so we need to handle that
1611 for (unsigned l = 0; l < NumElts; l += 16) {
1612 for (unsigned i = 0; i != 16; ++i) {
1613 unsigned Idx = ShiftVal + i;
1614 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1615 Idx += NumElts - 16; // End of lane, switch operand.
1616 Indices[l + i] = Idx + l;
1617 }
1618 }
1619
1620 Value *Align = Builder.CreateShuffleVector(
1621 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
1622
1623 return emitX86Select(Builder, Mask, Align, Passthru);
1624}
1625
1627 bool ZeroMask, bool IndexForm) {
1628 Type *Ty = CI.getType();
1629 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1630 unsigned EltWidth = Ty->getScalarSizeInBits();
1631 bool IsFloat = Ty->isFPOrFPVectorTy();
1632 Intrinsic::ID IID;
1633 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1634 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1635 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1636 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1637 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1638 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1639 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1640 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1641 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1642 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1643 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1644 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1645 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1646 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1647 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1648 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1649 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1650 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1651 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1652 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1653 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1654 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1655 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1656 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1657 else if (VecWidth == 128 && EltWidth == 16)
1658 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1659 else if (VecWidth == 256 && EltWidth == 16)
1660 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1661 else if (VecWidth == 512 && EltWidth == 16)
1662 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1663 else if (VecWidth == 128 && EltWidth == 8)
1664 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1665 else if (VecWidth == 256 && EltWidth == 8)
1666 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1667 else if (VecWidth == 512 && EltWidth == 8)
1668 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1669 else
1670 llvm_unreachable("Unexpected intrinsic");
1671
1672 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1673 CI.getArgOperand(2) };
1674
1675 // If this isn't index form we need to swap operand 0 and 1.
1676 if (!IndexForm)
1677 std::swap(Args[0], Args[1]);
1678
1679 Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1680 Args);
1681 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1682 : Builder.CreateBitCast(CI.getArgOperand(1),
1683 Ty);
1684 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1685}
1686
1688 Intrinsic::ID IID) {
1689 Type *Ty = CI.getType();
1690 Value *Op0 = CI.getOperand(0);
1691 Value *Op1 = CI.getOperand(1);
1692 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1693 Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
1694
1695 if (CI.arg_size() == 4) { // For masked intrinsics.
1696 Value *VecSrc = CI.getOperand(2);
1697 Value *Mask = CI.getOperand(3);
1698 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1699 }
1700 return Res;
1701}
1702
1704 bool IsRotateRight) {
1705 Type *Ty = CI.getType();
1706 Value *Src = CI.getArgOperand(0);
1707 Value *Amt = CI.getArgOperand(1);
1708
1709 // Amount may be scalar immediate, in which case create a splat vector.
1710 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1711 // we only care about the lowest log2 bits anyway.
1712 if (Amt->getType() != Ty) {
1713 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1714 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1715 Amt = Builder.CreateVectorSplat(NumElts, Amt);
1716 }
1717
1718 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1719 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1720 Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
1721
1722 if (CI.arg_size() == 4) { // For masked intrinsics.
1723 Value *VecSrc = CI.getOperand(2);
1724 Value *Mask = CI.getOperand(3);
1725 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1726 }
1727 return Res;
1728}
1729
1730static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
1731 bool IsSigned) {
1732 Type *Ty = CI.getType();
1733 Value *LHS = CI.getArgOperand(0);
1734 Value *RHS = CI.getArgOperand(1);
1735
1736 CmpInst::Predicate Pred;
1737 switch (Imm) {
1738 case 0x0:
1739 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1740 break;
1741 case 0x1:
1742 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1743 break;
1744 case 0x2:
1745 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1746 break;
1747 case 0x3:
1748 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1749 break;
1750 case 0x4:
1751 Pred = ICmpInst::ICMP_EQ;
1752 break;
1753 case 0x5:
1754 Pred = ICmpInst::ICMP_NE;
1755 break;
1756 case 0x6:
1757 return Constant::getNullValue(Ty); // FALSE
1758 case 0x7:
1759 return Constant::getAllOnesValue(Ty); // TRUE
1760 default:
1761 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1762 }
1763
1764 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1765 Value *Ext = Builder.CreateSExt(Cmp, Ty);
1766 return Ext;
1767}
1768
1770 bool IsShiftRight, bool ZeroMask) {
1771 Type *Ty = CI.getType();
1772 Value *Op0 = CI.getArgOperand(0);
1773 Value *Op1 = CI.getArgOperand(1);
1774 Value *Amt = CI.getArgOperand(2);
1775
1776 if (IsShiftRight)
1777 std::swap(Op0, Op1);
1778
1779 // Amount may be scalar immediate, in which case create a splat vector.
1780 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1781 // we only care about the lowest log2 bits anyway.
1782 if (Amt->getType() != Ty) {
1783 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1784 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1785 Amt = Builder.CreateVectorSplat(NumElts, Amt);
1786 }
1787
1788 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1789 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1790 Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
1791
1792 unsigned NumArgs = CI.arg_size();
1793 if (NumArgs >= 4) { // For masked intrinsics.
1794 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1795 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
1796 CI.getArgOperand(0);
1797 Value *Mask = CI.getOperand(NumArgs - 1);
1798 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1799 }
1800 return Res;
1801}
1802
1804 Value *Mask, bool Aligned) {
1805 // Cast the pointer to the right type.
1806 Ptr = Builder.CreateBitCast(Ptr,
1807 llvm::PointerType::getUnqual(Data->getType()));
1808 const Align Alignment =
1809 Aligned
1810 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
1811 : Align(1);
1812
1813 // If the mask is all ones just emit a regular store.
1814 if (const auto *C = dyn_cast<Constant>(Mask))
1815 if (C->isAllOnesValue())
1816 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
1817
1818 // Convert the mask from an integer type to a vector of i1.
1819 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
1820 Mask = getX86MaskVec(Builder, Mask, NumElts);
1821 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
1822}
1823
1825 Value *Passthru, Value *Mask, bool Aligned) {
1826 Type *ValTy = Passthru->getType();
1827 // Cast the pointer to the right type.
1829 const Align Alignment =
1830 Aligned
1831 ? Align(
1833 8)
1834 : Align(1);
1835
1836 // If the mask is all ones just emit a regular store.
1837 if (const auto *C = dyn_cast<Constant>(Mask))
1838 if (C->isAllOnesValue())
1839 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
1840
1841 // Convert the mask from an integer type to a vector of i1.
1842 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
1843 Mask = getX86MaskVec(Builder, Mask, NumElts);
1844 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
1845}
1846
1847static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
1848 Type *Ty = CI.getType();
1849 Value *Op0 = CI.getArgOperand(0);
1850 Function *F = Intrinsic::getDeclaration(CI.getModule(), Intrinsic::abs, Ty);
1851 Value *Res = Builder.CreateCall(F, {Op0, Builder.getInt1(false)});
1852 if (CI.arg_size() == 3)
1853 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
1854 return Res;
1855}
1856
1857static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
1858 Type *Ty = CI.getType();
1859
1860 // Arguments have a vXi32 type so cast to vXi64.
1861 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1862 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1863
1864 if (IsSigned) {
1865 // Shift left then arithmetic shift right.
1866 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1867 LHS = Builder.CreateShl(LHS, ShiftAmt);
1868 LHS = Builder.CreateAShr(LHS, ShiftAmt);
1869 RHS = Builder.CreateShl(RHS, ShiftAmt);
1870 RHS = Builder.CreateAShr(RHS, ShiftAmt);
1871 } else {
1872 // Clear the upper bits.
1873 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1874 LHS = Builder.CreateAnd(LHS, Mask);
1875 RHS = Builder.CreateAnd(RHS, Mask);
1876 }
1877
1878 Value *Res = Builder.CreateMul(LHS, RHS);
1879
1880 if (CI.arg_size() == 4)
1881 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1882
1883 return Res;
1884}
1885
1886// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1888 Value *Mask) {
1889 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
1890 if (Mask) {
1891 const auto *C = dyn_cast<Constant>(Mask);
1892 if (!C || !C->isAllOnesValue())
1893 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1894 }
1895
1896 if (NumElts < 8) {
1897 int Indices[8];
1898 for (unsigned i = 0; i != NumElts; ++i)
1899 Indices[i] = i;
1900 for (unsigned i = NumElts; i != 8; ++i)
1901 Indices[i] = NumElts + i % NumElts;
1902 Vec = Builder.CreateShuffleVector(Vec,
1904 Indices);
1905 }
1906 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1907}
1908
1910 unsigned CC, bool Signed) {
1911 Value *Op0 = CI.getArgOperand(0);
1912 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1913
1914 Value *Cmp;
1915 if (CC == 3) {
1917 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1918 } else if (CC == 7) {
1920 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1921 } else {
1923 switch (CC) {
1924 default: llvm_unreachable("Unknown condition code");
1925 case 0: Pred = ICmpInst::ICMP_EQ; break;
1926 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1927 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1928 case 4: Pred = ICmpInst::ICMP_NE; break;
1929 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1930 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1931 }
1932 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
1933 }
1934
1935 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
1936
1937 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
1938}
1939
1940// Replace a masked intrinsic with an older unmasked intrinsic.
1942 Intrinsic::ID IID) {
1943 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
1944 Value *Rep = Builder.CreateCall(Intrin,
1945 { CI.getArgOperand(0), CI.getArgOperand(1) });
1946 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
1947}
1948
1950 Value* A = CI.getArgOperand(0);
1951 Value* B = CI.getArgOperand(1);
1952 Value* Src = CI.getArgOperand(2);
1953 Value* Mask = CI.getArgOperand(3);
1954
1955 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
1956 Value* Cmp = Builder.CreateIsNotNull(AndNode);
1957 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
1958 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
1959 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
1960 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
1961}
1962
1964 Value* Op = CI.getArgOperand(0);
1965 Type* ReturnOp = CI.getType();
1966 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
1967 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
1968 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1969}
1970
1971// Replace intrinsic with unmasked version and a select.
1973 CallBase &CI, Value *&Rep) {
1974 Name = Name.substr(12); // Remove avx512.mask.
1975
1976 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
1977 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
1978 Intrinsic::ID IID;
1979 if (Name.starts_with("max.p")) {
1980 if (VecWidth == 128 && EltWidth == 32)
1981 IID = Intrinsic::x86_sse_max_ps;
1982 else if (VecWidth == 128 && EltWidth == 64)
1983 IID = Intrinsic::x86_sse2_max_pd;
1984 else if (VecWidth == 256 && EltWidth == 32)
1985 IID = Intrinsic::x86_avx_max_ps_256;
1986 else if (VecWidth == 256 && EltWidth == 64)
1987 IID = Intrinsic::x86_avx_max_pd_256;
1988 else
1989 llvm_unreachable("Unexpected intrinsic");
1990 } else if (Name.starts_with("min.p")) {
1991 if (VecWidth == 128 && EltWidth == 32)
1992 IID = Intrinsic::x86_sse_min_ps;
1993 else if (VecWidth == 128 && EltWidth == 64)
1994 IID = Intrinsic::x86_sse2_min_pd;
1995 else if (VecWidth == 256 && EltWidth == 32)
1996 IID = Intrinsic::x86_avx_min_ps_256;
1997 else if (VecWidth == 256 && EltWidth == 64)
1998 IID = Intrinsic::x86_avx_min_pd_256;
1999 else
2000 llvm_unreachable("Unexpected intrinsic");
2001 } else if (Name.starts_with("pshuf.b.")) {
2002 if (VecWidth == 128)
2003 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2004 else if (VecWidth == 256)
2005 IID = Intrinsic::x86_avx2_pshuf_b;
2006 else if (VecWidth == 512)
2007 IID = Intrinsic::x86_avx512_pshuf_b_512;
2008 else
2009 llvm_unreachable("Unexpected intrinsic");
2010 } else if (Name.starts_with("pmul.hr.sw.")) {
2011 if (VecWidth == 128)
2012 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2013 else if (VecWidth == 256)
2014 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2015 else if (VecWidth == 512)
2016 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2017 else
2018 llvm_unreachable("Unexpected intrinsic");
2019 } else if (Name.starts_with("pmulh.w.")) {
2020 if (VecWidth == 128)
2021 IID = Intrinsic::x86_sse2_pmulh_w;
2022 else if (VecWidth == 256)
2023 IID = Intrinsic::x86_avx2_pmulh_w;
2024 else if (VecWidth == 512)
2025 IID = Intrinsic::x86_avx512_pmulh_w_512;
2026 else
2027 llvm_unreachable("Unexpected intrinsic");
2028 } else if (Name.starts_with("pmulhu.w.")) {
2029 if (VecWidth == 128)
2030 IID = Intrinsic::x86_sse2_pmulhu_w;
2031 else if (VecWidth == 256)
2032 IID = Intrinsic::x86_avx2_pmulhu_w;
2033 else if (VecWidth == 512)
2034 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2035 else
2036 llvm_unreachable("Unexpected intrinsic");
2037 } else if (Name.starts_with("pmaddw.d.")) {
2038 if (VecWidth == 128)
2039 IID = Intrinsic::x86_sse2_pmadd_wd;
2040 else if (VecWidth == 256)
2041 IID = Intrinsic::x86_avx2_pmadd_wd;
2042 else if (VecWidth == 512)
2043 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2044 else
2045 llvm_unreachable("Unexpected intrinsic");
2046 } else if (Name.starts_with("pmaddubs.w.")) {
2047 if (VecWidth == 128)
2048 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2049 else if (VecWidth == 256)
2050 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2051 else if (VecWidth == 512)
2052 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2053 else
2054 llvm_unreachable("Unexpected intrinsic");
2055 } else if (Name.starts_with("packsswb.")) {
2056 if (VecWidth == 128)
2057 IID = Intrinsic::x86_sse2_packsswb_128;
2058 else if (VecWidth == 256)
2059 IID = Intrinsic::x86_avx2_packsswb;
2060 else if (VecWidth == 512)
2061 IID = Intrinsic::x86_avx512_packsswb_512;
2062 else
2063 llvm_unreachable("Unexpected intrinsic");
2064 } else if (Name.starts_with("packssdw.")) {
2065 if (VecWidth == 128)
2066 IID = Intrinsic::x86_sse2_packssdw_128;
2067 else if (VecWidth == 256)
2068 IID = Intrinsic::x86_avx2_packssdw;
2069 else if (VecWidth == 512)
2070 IID = Intrinsic::x86_avx512_packssdw_512;
2071 else
2072 llvm_unreachable("Unexpected intrinsic");
2073 } else if (Name.starts_with("packuswb.")) {
2074 if (VecWidth == 128)
2075 IID = Intrinsic::x86_sse2_packuswb_128;
2076 else if (VecWidth == 256)
2077 IID = Intrinsic::x86_avx2_packuswb;
2078 else if (VecWidth == 512)
2079 IID = Intrinsic::x86_avx512_packuswb_512;
2080 else
2081 llvm_unreachable("Unexpected intrinsic");
2082 } else if (Name.starts_with("packusdw.")) {
2083 if (VecWidth == 128)
2084 IID = Intrinsic::x86_sse41_packusdw;
2085 else if (VecWidth == 256)
2086 IID = Intrinsic::x86_avx2_packusdw;
2087 else if (VecWidth == 512)
2088 IID = Intrinsic::x86_avx512_packusdw_512;
2089 else
2090 llvm_unreachable("Unexpected intrinsic");
2091 } else if (Name.starts_with("vpermilvar.")) {
2092 if (VecWidth == 128 && EltWidth == 32)
2093 IID = Intrinsic::x86_avx_vpermilvar_ps;
2094 else if (VecWidth == 128 && EltWidth == 64)
2095 IID = Intrinsic::x86_avx_vpermilvar_pd;
2096 else if (VecWidth == 256 && EltWidth == 32)
2097 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2098 else if (VecWidth == 256 && EltWidth == 64)
2099 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2100 else if (VecWidth == 512 && EltWidth == 32)
2101 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2102 else if (VecWidth == 512 && EltWidth == 64)
2103 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2104 else
2105 llvm_unreachable("Unexpected intrinsic");
2106 } else if (Name == "cvtpd2dq.256") {
2107 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2108 } else if (Name == "cvtpd2ps.256") {
2109 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2110 } else if (Name == "cvttpd2dq.256") {
2111 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2112 } else if (Name == "cvttps2dq.128") {
2113 IID = Intrinsic::x86_sse2_cvttps2dq;
2114 } else if (Name == "cvttps2dq.256") {
2115 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2116 } else if (Name.starts_with("permvar.")) {
2117 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2118 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2119 IID = Intrinsic::x86_avx2_permps;
2120 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2121 IID = Intrinsic::x86_avx2_permd;
2122 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2123 IID = Intrinsic::x86_avx512_permvar_df_256;
2124 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2125 IID = Intrinsic::x86_avx512_permvar_di_256;
2126 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2127 IID = Intrinsic::x86_avx512_permvar_sf_512;
2128 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2129 IID = Intrinsic::x86_avx512_permvar_si_512;
2130 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2131 IID = Intrinsic::x86_avx512_permvar_df_512;
2132 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2133 IID = Intrinsic::x86_avx512_permvar_di_512;
2134 else if (VecWidth == 128 && EltWidth == 16)
2135 IID = Intrinsic::x86_avx512_permvar_hi_128;
2136 else if (VecWidth == 256 && EltWidth == 16)
2137 IID = Intrinsic::x86_avx512_permvar_hi_256;
2138 else if (VecWidth == 512 && EltWidth == 16)
2139 IID = Intrinsic::x86_avx512_permvar_hi_512;
2140 else if (VecWidth == 128 && EltWidth == 8)
2141 IID = Intrinsic::x86_avx512_permvar_qi_128;
2142 else if (VecWidth == 256 && EltWidth == 8)
2143 IID = Intrinsic::x86_avx512_permvar_qi_256;
2144 else if (VecWidth == 512 && EltWidth == 8)
2145 IID = Intrinsic::x86_avx512_permvar_qi_512;
2146 else
2147 llvm_unreachable("Unexpected intrinsic");
2148 } else if (Name.starts_with("dbpsadbw.")) {
2149 if (VecWidth == 128)
2150 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2151 else if (VecWidth == 256)
2152 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2153 else if (VecWidth == 512)
2154 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2155 else
2156 llvm_unreachable("Unexpected intrinsic");
2157 } else if (Name.starts_with("pmultishift.qb.")) {
2158 if (VecWidth == 128)
2159 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2160 else if (VecWidth == 256)
2161 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2162 else if (VecWidth == 512)
2163 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2164 else
2165 llvm_unreachable("Unexpected intrinsic");
2166 } else if (Name.starts_with("conflict.")) {
2167 if (Name[9] == 'd' && VecWidth == 128)
2168 IID = Intrinsic::x86_avx512_conflict_d_128;
2169 else if (Name[9] == 'd' && VecWidth == 256)
2170 IID = Intrinsic::x86_avx512_conflict_d_256;
2171 else if (Name[9] == 'd' && VecWidth == 512)
2172 IID = Intrinsic::x86_avx512_conflict_d_512;
2173 else if (Name[9] == 'q' && VecWidth == 128)
2174 IID = Intrinsic::x86_avx512_conflict_q_128;
2175 else if (Name[9] == 'q' && VecWidth == 256)
2176 IID = Intrinsic::x86_avx512_conflict_q_256;
2177 else if (Name[9] == 'q' && VecWidth == 512)
2178 IID = Intrinsic::x86_avx512_conflict_q_512;
2179 else
2180 llvm_unreachable("Unexpected intrinsic");
2181 } else if (Name.starts_with("pavg.")) {
2182 if (Name[5] == 'b' && VecWidth == 128)
2183 IID = Intrinsic::x86_sse2_pavg_b;
2184 else if (Name[5] == 'b' && VecWidth == 256)
2185 IID = Intrinsic::x86_avx2_pavg_b;
2186 else if (Name[5] == 'b' && VecWidth == 512)
2187 IID = Intrinsic::x86_avx512_pavg_b_512;
2188 else if (Name[5] == 'w' && VecWidth == 128)
2189 IID = Intrinsic::x86_sse2_pavg_w;
2190 else if (Name[5] == 'w' && VecWidth == 256)
2191 IID = Intrinsic::x86_avx2_pavg_w;
2192 else if (Name[5] == 'w' && VecWidth == 512)
2193 IID = Intrinsic::x86_avx512_pavg_w_512;
2194 else
2195 llvm_unreachable("Unexpected intrinsic");
2196 } else
2197 return false;
2198
2199 SmallVector<Value *, 4> Args(CI.args());
2200 Args.pop_back();
2201 Args.pop_back();
2202 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
2203 Args);
2204 unsigned NumArgs = CI.arg_size();
2205 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2206 CI.getArgOperand(NumArgs - 2));
2207 return true;
2208}
2209
2210/// Upgrade comment in call to inline asm that represents an objc retain release
2211/// marker.
2212void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2213 size_t Pos;
2214 if (AsmStr->find("mov\tfp") == 0 &&
2215 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2216 (Pos = AsmStr->find("# marker")) != std::string::npos) {
2217 AsmStr->replace(Pos, 1, ";");
2218 }
2219}
2220
2222 IRBuilder<> &Builder) {
2223 if (Name == "mve.vctp64.old") {
2224 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
2225 // correct type.
2226 Value *VCTP = Builder.CreateCall(
2227 Intrinsic::getDeclaration(F->getParent(), Intrinsic::arm_mve_vctp64),
2228 CI->getArgOperand(0), CI->getName());
2229 Value *C1 = Builder.CreateCall(
2231 F->getParent(), Intrinsic::arm_mve_pred_v2i,
2232 {VectorType::get(Builder.getInt1Ty(), 2, false)}),
2233 VCTP);
2234 return Builder.CreateCall(
2236 F->getParent(), Intrinsic::arm_mve_pred_i2v,
2237 {VectorType::get(Builder.getInt1Ty(), 4, false)}),
2238 C1);
2239 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
2240 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
2241 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
2242 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
2243 Name ==
2244 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
2245 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
2246 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
2247 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
2248 Name ==
2249 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
2250 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
2251 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
2252 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
2253 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
2254 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
2255 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
2256 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
2257 std::vector<Type *> Tys;
2258 unsigned ID = CI->getIntrinsicID();
2259 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
2260 switch (ID) {
2261 case Intrinsic::arm_mve_mull_int_predicated:
2262 case Intrinsic::arm_mve_vqdmull_predicated:
2263 case Intrinsic::arm_mve_vldr_gather_base_predicated:
2264 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
2265 break;
2266 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
2267 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
2268 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
2269 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
2270 V2I1Ty};
2271 break;
2272 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
2273 Tys = {CI->getType(), CI->getOperand(0)->getType(),
2274 CI->getOperand(1)->getType(), V2I1Ty};
2275 break;
2276 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
2277 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
2278 CI->getOperand(2)->getType(), V2I1Ty};
2279 break;
2280 case Intrinsic::arm_cde_vcx1q_predicated:
2281 case Intrinsic::arm_cde_vcx1qa_predicated:
2282 case Intrinsic::arm_cde_vcx2q_predicated:
2283 case Intrinsic::arm_cde_vcx2qa_predicated:
2284 case Intrinsic::arm_cde_vcx3q_predicated:
2285 case Intrinsic::arm_cde_vcx3qa_predicated:
2286 Tys = {CI->getOperand(1)->getType(), V2I1Ty};
2287 break;
2288 default:
2289 llvm_unreachable("Unhandled Intrinsic!");
2290 }
2291
2292 std::vector<Value *> Ops;
2293 for (Value *Op : CI->args()) {
2294 Type *Ty = Op->getType();
2295 if (Ty->getScalarSizeInBits() == 1) {
2296 Value *C1 = Builder.CreateCall(
2298 F->getParent(), Intrinsic::arm_mve_pred_v2i,
2299 {VectorType::get(Builder.getInt1Ty(), 4, false)}),
2300 Op);
2301 Op = Builder.CreateCall(
2302 Intrinsic::getDeclaration(F->getParent(),
2303 Intrinsic::arm_mve_pred_i2v, {V2I1Ty}),
2304 C1);
2305 }
2306 Ops.push_back(Op);
2307 }
2308
2309 Function *Fn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
2310 return Builder.CreateCall(Fn, Ops, CI->getName());
2311 }
2312 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
2313}
2314
2316 Function *F, IRBuilder<> &Builder) {
2317 const bool IsInc = Name.starts_with("atomic.inc.");
2318 if (IsInc || Name.starts_with("atomic.dec.")) {
2319 if (CI->getNumOperands() != 6) // Malformed bitcode.
2320 return nullptr;
2321
2322 AtomicRMWInst::BinOp RMWOp =
2324
2325 Value *Ptr = CI->getArgOperand(0);
2326 Value *Val = CI->getArgOperand(1);
2327 ConstantInt *OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
2328 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
2329
2330 AtomicOrdering Order = AtomicOrdering::SequentiallyConsistent;
2331 if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
2332 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
2333 if (Order == AtomicOrdering::NotAtomic ||
2334 Order == AtomicOrdering::Unordered)
2335 Order = AtomicOrdering::SequentiallyConsistent;
2336
2337 // The scope argument never really worked correctly. Use agent as the most
2338 // conservative option which should still always produce the instruction.
2339 SyncScope::ID SSID = F->getContext().getOrInsertSyncScopeID("agent");
2340 AtomicRMWInst *RMW =
2341 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
2342
2343 if (!VolatileArg || !VolatileArg->isZero())
2344 RMW->setVolatile(true);
2345 return RMW;
2346 }
2347
2348 llvm_unreachable("Unknown function for AMDGPU intrinsic upgrade.");
2349}
2350
2351/// Helper to unwrap intrinsic call MetadataAsValue operands.
2352template <typename MDType>
2353static MDType *unwrapMAVOp(CallBase *CI, unsigned Op) {
2354 if (MetadataAsValue *MAV = dyn_cast<MetadataAsValue>(CI->getArgOperand(Op)))
2355 return dyn_cast<MDType>(MAV->getMetadata());
2356 return nullptr;
2357}
2358
2359/// Convert debug intrinsic calls to non-instruction debug records.
2360/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
2361/// \p CI - The debug intrinsic call.
2363 DbgRecord *DR = nullptr;
2364 if (Name == "label") {
2365 DR = new DbgLabelRecord(unwrapMAVOp<DILabel>(CI, 0), CI->getDebugLoc());
2366 } else if (Name == "assign") {
2367 DR = new DbgVariableRecord(
2368 unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, 1),
2369 unwrapMAVOp<DIExpression>(CI, 2), unwrapMAVOp<DIAssignID>(CI, 3),
2370 unwrapMAVOp<Metadata>(CI, 4), unwrapMAVOp<DIExpression>(CI, 5),
2371 CI->getDebugLoc());
2372 } else if (Name == "declare") {
2373 DR = new DbgVariableRecord(
2374 unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, 1),
2375 unwrapMAVOp<DIExpression>(CI, 2), CI->getDebugLoc(),
2376 DbgVariableRecord::LocationType::Declare);
2377 } else if (Name == "addr") {
2378 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
2379 DIExpression *Expr = unwrapMAVOp<DIExpression>(CI, 2);
2380 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
2381 DR = new DbgVariableRecord(unwrapMAVOp<Metadata>(CI, 0),
2382 unwrapMAVOp<DILocalVariable>(CI, 1), Expr,
2383 CI->getDebugLoc());
2384 } else if (Name == "value") {
2385 // An old version of dbg.value had an extra offset argument.
2386 unsigned VarOp = 1;
2387 unsigned ExprOp = 2;
2388 if (CI->arg_size() == 4) {
2389 auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1));
2390 // Nonzero offset dbg.values get dropped without a replacement.
2391 if (!Offset || !Offset->isZeroValue())
2392 return;
2393 VarOp = 2;
2394 ExprOp = 3;
2395 }
2396 DR = new DbgVariableRecord(
2397 unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, VarOp),
2398 unwrapMAVOp<DIExpression>(CI, ExprOp), CI->getDebugLoc());
2399 }
2400 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
2402}
2403
2404/// Upgrade a call to an old intrinsic. All argument and return casting must be
2405/// provided to seamlessly integrate with existing context.
2407 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
2408 // checks the callee's function type matches. It's likely we need to handle
2409 // type changes here.
2410 Function *F = dyn_cast<Function>(CI->getCalledOperand());
2411 if (!F)
2412 return;
2413
2414 LLVMContext &C = CI->getContext();
2415 IRBuilder<> Builder(C);
2416 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
2417
2418 if (!NewFn) {
2419 bool FallthroughToDefaultUpgrade = false;
2420 // Get the Function's name.
2421 StringRef Name = F->getName();
2422
2423 assert(Name.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'");
2424 Name = Name.substr(5);
2425
2426 bool IsX86 = Name.consume_front("x86.");
2427 bool IsNVVM = Name.consume_front("nvvm.");
2428 bool IsARM = Name.consume_front("arm.");
2429 bool IsAMDGCN = Name.consume_front("amdgcn.");
2430 bool IsDbg = Name.consume_front("dbg.");
2431
2432 if (IsX86 && Name.starts_with("sse4a.movnt.")) {
2434 Elts.push_back(
2435 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2436 MDNode *Node = MDNode::get(C, Elts);
2437
2438 Value *Arg0 = CI->getArgOperand(0);
2439 Value *Arg1 = CI->getArgOperand(1);
2440
2441 // Nontemporal (unaligned) store of the 0'th element of the float/double
2442 // vector.
2443 Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
2444 PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
2445 Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
2446 Value *Extract =
2447 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2448
2449 StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, Align(1));
2450 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2451
2452 // Remove intrinsic.
2453 CI->eraseFromParent();
2454 return;
2455 }
2456
2457 if (IsX86 && (Name.starts_with("avx.movnt.") ||
2458 Name.starts_with("avx512.storent."))) {
2460 Elts.push_back(
2461 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2462 MDNode *Node = MDNode::get(C, Elts);
2463
2464 Value *Arg0 = CI->getArgOperand(0);
2465 Value *Arg1 = CI->getArgOperand(1);
2466
2467 // Convert the type of the pointer to a pointer to the stored type.
2468 Value *BC = Builder.CreateBitCast(Arg0,
2469 PointerType::getUnqual(Arg1->getType()),
2470 "cast");
2471 StoreInst *SI = Builder.CreateAlignedStore(
2472 Arg1, BC,
2474 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2475
2476 // Remove intrinsic.
2477 CI->eraseFromParent();
2478 return;
2479 }
2480
2481 if (IsX86 && Name == "sse2.storel.dq") {
2482 Value *Arg0 = CI->getArgOperand(0);
2483 Value *Arg1 = CI->getArgOperand(1);
2484
2485 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2486 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2487 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2488 Value *BC = Builder.CreateBitCast(Arg0,
2489 PointerType::getUnqual(Elt->getType()),
2490 "cast");
2491 Builder.CreateAlignedStore(Elt, BC, Align(1));
2492
2493 // Remove intrinsic.
2494 CI->eraseFromParent();
2495 return;
2496 }
2497
2498 if (IsX86 && (Name.starts_with("sse.storeu.") ||
2499 Name.starts_with("sse2.storeu.") ||
2500 Name.starts_with("avx.storeu."))) {
2501 Value *Arg0 = CI->getArgOperand(0);
2502 Value *Arg1 = CI->getArgOperand(1);
2503
2504 Arg0 = Builder.CreateBitCast(Arg0,
2505 PointerType::getUnqual(Arg1->getType()),
2506 "cast");
2507 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2508
2509 // Remove intrinsic.
2510 CI->eraseFromParent();
2511 return;
2512 }
2513
2514 if (IsX86 && Name == "avx512.mask.store.ss") {
2515 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2516 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2517 Mask, false);
2518
2519 // Remove intrinsic.
2520 CI->eraseFromParent();
2521 return;
2522 }
2523
2524 if (IsX86 && Name.starts_with("avx512.mask.store")) {
2525 // "avx512.mask.storeu." or "avx512.mask.store."
2526 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2527 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2528 CI->getArgOperand(2), Aligned);
2529
2530 // Remove intrinsic.
2531 CI->eraseFromParent();
2532 return;
2533 }
2534
2535 Value *Rep = nullptr;
2536 // Upgrade packed integer vector compare intrinsics to compare instructions.
2537 if (IsX86 && (Name.starts_with("sse2.pcmp") ||
2538 Name.starts_with("avx2.pcmp"))) {
2539 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2540 bool CmpEq = Name[9] == 'e';
2541 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2542 CI->getArgOperand(0), CI->getArgOperand(1));
2543 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2544 } else if (IsX86 && (Name.starts_with("avx512.broadcastm"))) {
2545 Type *ExtTy = Type::getInt32Ty(C);
2546 if (CI->getOperand(0)->getType()->isIntegerTy(8))
2547 ExtTy = Type::getInt64Ty(C);
2548 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2549 ExtTy->getPrimitiveSizeInBits();
2550 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2551 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2552 } else if (IsX86 && (Name == "sse.sqrt.ss" ||
2553 Name == "sse2.sqrt.sd")) {
2554 Value *Vec = CI->getArgOperand(0);
2555 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2556 Function *Intr = Intrinsic::getDeclaration(F->getParent(),
2557 Intrinsic::sqrt, Elt0->getType());
2558 Elt0 = Builder.CreateCall(Intr, Elt0);
2559 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2560 } else if (IsX86 && (Name.starts_with("avx.sqrt.p") ||
2561 Name.starts_with("sse2.sqrt.p") ||
2562 Name.starts_with("sse.sqrt.p"))) {
2563 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2564 Intrinsic::sqrt,
2565 CI->getType()),
2566 {CI->getArgOperand(0)});
2567 } else if (IsX86 && (Name.starts_with("avx512.mask.sqrt.p"))) {
2568 if (CI->arg_size() == 4 &&
2569 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2570 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2571 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2572 : Intrinsic::x86_avx512_sqrt_pd_512;
2573
2574 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
2576 IID), Args);
2577 } else {
2578 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2579 Intrinsic::sqrt,
2580 CI->getType()),
2581 {CI->getArgOperand(0)});
2582 }
2583 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2584 CI->getArgOperand(1));
2585 } else if (IsX86 && (Name.starts_with("avx512.ptestm") ||
2586 Name.starts_with("avx512.ptestnm"))) {
2587 Value *Op0 = CI->getArgOperand(0);
2588 Value *Op1 = CI->getArgOperand(1);
2589 Value *Mask = CI->getArgOperand(2);
2590 Rep = Builder.CreateAnd(Op0, Op1);
2591 llvm::Type *Ty = Op0->getType();
2593 ICmpInst::Predicate Pred =
2594 Name.starts_with("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2595 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2596 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2597 } else if (IsX86 && (Name.starts_with("avx512.mask.pbroadcast"))){
2598 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2599 ->getNumElements();
2600 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2601 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2602 CI->getArgOperand(1));
2603 } else if (IsX86 && (Name.starts_with("avx512.kunpck"))) {
2604 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2605 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2606 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2607 int Indices[64];
2608 for (unsigned i = 0; i != NumElts; ++i)
2609 Indices[i] = i;
2610
2611 // First extract half of each vector. This gives better codegen than
2612 // doing it in a single shuffle.
2613 LHS =
2614 Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2615 RHS =
2616 Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2617 // Concat the vectors.
2618 // NOTE: Operands have to be swapped to match intrinsic definition.
2619 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2620 Rep = Builder.CreateBitCast(Rep, CI->getType());
2621 } else if (IsX86 && Name == "avx512.kand.w") {
2622 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2623 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2624 Rep = Builder.CreateAnd(LHS, RHS);
2625 Rep = Builder.CreateBitCast(Rep, CI->getType());
2626 } else if (IsX86 && Name == "avx512.kandn.w") {
2627 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2628 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2629 LHS = Builder.CreateNot(LHS);
2630 Rep = Builder.CreateAnd(LHS, RHS);
2631 Rep = Builder.CreateBitCast(Rep, CI->getType());
2632 } else if (IsX86 && Name == "avx512.kor.w") {
2633 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2634 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2635 Rep = Builder.CreateOr(LHS, RHS);
2636 Rep = Builder.CreateBitCast(Rep, CI->getType());
2637 } else if (IsX86 && Name == "avx512.kxor.w") {
2638 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2639 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2640 Rep = Builder.CreateXor(LHS, RHS);
2641 Rep = Builder.CreateBitCast(Rep, CI->getType());
2642 } else if (IsX86 && Name == "avx512.kxnor.w") {
2643 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2644 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2645 LHS = Builder.CreateNot(LHS);
2646 Rep = Builder.CreateXor(LHS, RHS);
2647 Rep = Builder.CreateBitCast(Rep, CI->getType());
2648 } else if (IsX86 && Name == "avx512.knot.w") {
2649 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2650 Rep = Builder.CreateNot(Rep);
2651 Rep = Builder.CreateBitCast(Rep, CI->getType());
2652 } else if (IsX86 &&
2653 (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
2654 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2655 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2656 Rep = Builder.CreateOr(LHS, RHS);
2657 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2658 Value *C;
2659 if (Name[14] == 'c')
2660 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2661 else
2662 C = ConstantInt::getNullValue(Builder.getInt16Ty());
2663 Rep = Builder.CreateICmpEQ(Rep, C);
2664 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2665 } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2666 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2667 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2668 Name == "sse.div.ss" || Name == "sse2.div.sd")) {
2669 Type *I32Ty = Type::getInt32Ty(C);
2670 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2671 ConstantInt::get(I32Ty, 0));
2672 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2673 ConstantInt::get(I32Ty, 0));
2674 Value *EltOp;
2675 if (Name.contains(".add."))
2676 EltOp = Builder.CreateFAdd(Elt0, Elt1);
2677 else if (Name.contains(".sub."))
2678 EltOp = Builder.CreateFSub(Elt0, Elt1);
2679 else if (Name.contains(".mul."))
2680 EltOp = Builder.CreateFMul(Elt0, Elt1);
2681 else
2682 EltOp = Builder.CreateFDiv(Elt0, Elt1);
2683 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2684 ConstantInt::get(I32Ty, 0));
2685 } else if (IsX86 && Name.starts_with("avx512.mask.pcmp")) {
2686 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2687 bool CmpEq = Name[16] == 'e';
2688 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2689 } else if (IsX86 && Name.starts_with("avx512.mask.vpshufbitqmb.")) {
2690 Type *OpTy = CI->getArgOperand(0)->getType();
2691 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2692 Intrinsic::ID IID;
2693 switch (VecWidth) {
2694 default: llvm_unreachable("Unexpected intrinsic");
2695 case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break;
2696 case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break;
2697 case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break;
2698 }
2699
2700 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2701 { CI->getOperand(0), CI->getArgOperand(1) });
2702 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2703 } else if (IsX86 && Name.starts_with("avx512.mask.fpclass.p")) {
2704 Type *OpTy = CI->getArgOperand(0)->getType();
2705 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2706 unsigned EltWidth = OpTy->getScalarSizeInBits();
2707 Intrinsic::ID IID;
2708 if (VecWidth == 128 && EltWidth == 32)
2709 IID = Intrinsic::x86_avx512_fpclass_ps_128;
2710 else if (VecWidth == 256 && EltWidth == 32)
2711 IID = Intrinsic::x86_avx512_fpclass_ps_256;
2712 else if (VecWidth == 512 && EltWidth == 32)
2713 IID = Intrinsic::x86_avx512_fpclass_ps_512;
2714 else if (VecWidth == 128 && EltWidth == 64)
2715 IID = Intrinsic::x86_avx512_fpclass_pd_128;
2716 else if (VecWidth == 256 && EltWidth == 64)
2717 IID = Intrinsic::x86_avx512_fpclass_pd_256;
2718 else if (VecWidth == 512 && EltWidth == 64)
2719 IID = Intrinsic::x86_avx512_fpclass_pd_512;
2720 else
2721 llvm_unreachable("Unexpected intrinsic");
2722
2723 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2724 { CI->getOperand(0), CI->getArgOperand(1) });
2725 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2726 } else if (IsX86 && Name.starts_with("avx512.cmp.p")) {
2727 SmallVector<Value *, 4> Args(CI->args());
2728 Type *OpTy = Args[0]->getType();
2729 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2730 unsigned EltWidth = OpTy->getScalarSizeInBits();
2731 Intrinsic::ID IID;
2732 if (VecWidth == 128 && EltWidth == 32)
2733 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2734 else if (VecWidth == 256 && EltWidth == 32)
2735 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2736 else if (VecWidth == 512 && EltWidth == 32)
2737 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2738 else if (VecWidth == 128 && EltWidth == 64)
2739 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2740 else if (VecWidth == 256 && EltWidth == 64)
2741 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2742 else if (VecWidth == 512 && EltWidth == 64)
2743 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2744 else
2745 llvm_unreachable("Unexpected intrinsic");
2746
2748 if (VecWidth == 512)
2749 std::swap(Mask, Args.back());
2750 Args.push_back(Mask);
2751
2752 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2753 Args);
2754 } else if (IsX86 && Name.starts_with("avx512.mask.cmp.")) {
2755 // Integer compare intrinsics.
2756 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2757 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2758 } else if (IsX86 && Name.starts_with("avx512.mask.ucmp.")) {
2759 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2760 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2761 } else if (IsX86 && (Name.starts_with("avx512.cvtb2mask.") ||
2762 Name.starts_with("avx512.cvtw2mask.") ||
2763 Name.starts_with("avx512.cvtd2mask.") ||
2764 Name.starts_with("avx512.cvtq2mask."))) {
2765 Value *Op = CI->getArgOperand(0);
2766 Value *Zero = llvm::Constant::getNullValue(Op->getType());
2767 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2768 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2769 } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
2770 Name == "ssse3.pabs.w.128" ||
2771 Name == "ssse3.pabs.d.128" ||
2772 Name.starts_with("avx2.pabs") ||
2773 Name.starts_with("avx512.mask.pabs"))) {
2774 Rep = upgradeAbs(Builder, *CI);
2775 } else if (IsX86 && (Name == "sse41.pmaxsb" ||
2776 Name == "sse2.pmaxs.w" ||
2777 Name == "sse41.pmaxsd" ||
2778 Name.starts_with("avx2.pmaxs") ||
2779 Name.starts_with("avx512.mask.pmaxs"))) {
2780 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
2781 } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
2782 Name == "sse41.pmaxuw" ||
2783 Name == "sse41.pmaxud" ||
2784 Name.starts_with("avx2.pmaxu") ||
2785 Name.starts_with("avx512.mask.pmaxu"))) {
2786 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
2787 } else if (IsX86 && (Name == "sse41.pminsb" ||
2788 Name == "sse2.pmins.w" ||
2789 Name == "sse41.pminsd" ||
2790 Name.starts_with("avx2.pmins") ||
2791 Name.starts_with("avx512.mask.pmins"))) {
2792 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
2793 } else if (IsX86 && (Name == "sse2.pminu.b" ||
2794 Name == "sse41.pminuw" ||
2795 Name == "sse41.pminud" ||
2796 Name.starts_with("avx2.pminu") ||
2797 Name.starts_with("avx512.mask.pminu"))) {
2798 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
2799 } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
2800 Name == "avx2.pmulu.dq" ||
2801 Name == "avx512.pmulu.dq.512" ||
2802 Name.starts_with("avx512.mask.pmulu.dq."))) {
2803 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
2804 } else if (IsX86 && (Name == "sse41.pmuldq" ||
2805 Name == "avx2.pmul.dq" ||
2806 Name == "avx512.pmul.dq.512" ||
2807 Name.starts_with("avx512.mask.pmul.dq."))) {
2808 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
2809 } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
2810 Name == "sse2.cvtsi2sd" ||
2811 Name == "sse.cvtsi642ss" ||
2812 Name == "sse2.cvtsi642sd")) {
2813 Rep = Builder.CreateSIToFP(
2814 CI->getArgOperand(1),
2815 cast<VectorType>(CI->getType())->getElementType());
2816 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2817 } else if (IsX86 && Name == "avx512.cvtusi2sd") {
2818 Rep = Builder.CreateUIToFP(
2819 CI->getArgOperand(1),
2820 cast<VectorType>(CI->getType())->getElementType());
2821 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2822 } else if (IsX86 && Name == "sse2.cvtss2sd") {
2823 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2824 Rep = Builder.CreateFPExt(
2825 Rep, cast<VectorType>(CI->getType())->getElementType());
2826 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2827 } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
2828 Name == "sse2.cvtdq2ps" ||
2829 Name == "avx.cvtdq2.pd.256" ||
2830 Name == "avx.cvtdq2.ps.256" ||
2831 Name.starts_with("avx512.mask.cvtdq2pd.") ||
2832 Name.starts_with("avx512.mask.cvtudq2pd.") ||
2833 Name.starts_with("avx512.mask.cvtdq2ps.") ||
2834 Name.starts_with("avx512.mask.cvtudq2ps.") ||
2835 Name.starts_with("avx512.mask.cvtqq2pd.") ||
2836 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
2837 Name == "avx512.mask.cvtqq2ps.256" ||
2838 Name == "avx512.mask.cvtqq2ps.512" ||
2839 Name == "avx512.mask.cvtuqq2ps.256" ||
2840 Name == "avx512.mask.cvtuqq2ps.512" ||
2841 Name == "sse2.cvtps2pd" ||
2842 Name == "avx.cvt.ps2.pd.256" ||
2843 Name == "avx512.mask.cvtps2pd.128" ||
2844 Name == "avx512.mask.cvtps2pd.256")) {
2845 auto *DstTy = cast<FixedVectorType>(CI->getType());
2846 Rep = CI->getArgOperand(0);
2847 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2848
2849 unsigned NumDstElts = DstTy->getNumElements();
2850 if (NumDstElts < SrcTy->getNumElements()) {
2851 assert(NumDstElts == 2 && "Unexpected vector size");
2852 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
2853 }
2854
2855 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2856 bool IsUnsigned = Name.contains("cvtu");
2857 if (IsPS2PD)
2858 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2859 else if (CI->arg_size() == 4 &&
2860 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2861 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2862 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2863 : Intrinsic::x86_avx512_sitofp_round;
2865 { DstTy, SrcTy });
2866 Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) });
2867 } else {
2868 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2869 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2870 }
2871
2872 if (CI->arg_size() >= 3)
2873 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2874 CI->getArgOperand(1));
2875 } else if (IsX86 && (Name.starts_with("avx512.mask.vcvtph2ps.") ||
2876 Name.starts_with("vcvtph2ps."))) {
2877 auto *DstTy = cast<FixedVectorType>(CI->getType());
2878 Rep = CI->getArgOperand(0);
2879 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2880 unsigned NumDstElts = DstTy->getNumElements();
2881 if (NumDstElts != SrcTy->getNumElements()) {
2882 assert(NumDstElts == 4 && "Unexpected vector size");
2883 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
2884 }
2885 Rep = Builder.CreateBitCast(
2886 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
2887 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
2888 if (CI->arg_size() >= 3)
2889 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2890 CI->getArgOperand(1));
2891 } else if (IsX86 && Name.starts_with("avx512.mask.load")) {
2892 // "avx512.mask.loadu." or "avx512.mask.load."
2893 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
2894 Rep =
2895 upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2896 CI->getArgOperand(2), Aligned);
2897 } else if (IsX86 && Name.starts_with("avx512.mask.expand.load.")) {
2898 auto *ResultTy = cast<FixedVectorType>(CI->getType());
2899 Type *PtrTy = ResultTy->getElementType();
2900
2901 // Cast the pointer to element type.
2902 Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2904
2905 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2906 ResultTy->getNumElements());
2907
2908 Function *ELd = Intrinsic::getDeclaration(F->getParent(),
2909 Intrinsic::masked_expandload,
2910 ResultTy);
2911 Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
2912 } else if (IsX86 && Name.starts_with("avx512.mask.compress.store.")) {
2913 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
2914 Type *PtrTy = ResultTy->getElementType();
2915
2916 // Cast the pointer to element type.
2917 Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2919
2920 Value *MaskVec =
2921 getX86MaskVec(Builder, CI->getArgOperand(2),
2922 cast<FixedVectorType>(ResultTy)->getNumElements());
2923
2924 Function *CSt = Intrinsic::getDeclaration(F->getParent(),
2925 Intrinsic::masked_compressstore,
2926 ResultTy);
2927 Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
2928 } else if (IsX86 && (Name.starts_with("avx512.mask.compress.") ||
2929 Name.starts_with("avx512.mask.expand."))) {
2930 auto *ResultTy = cast<FixedVectorType>(CI->getType());
2931
2932 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2933 ResultTy->getNumElements());
2934
2935 bool IsCompress = Name[12] == 'c';
2936 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2937 : Intrinsic::x86_avx512_mask_expand;
2938 Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
2939 Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1),
2940 MaskVec });
2941 } else if (IsX86 && Name.starts_with("xop.vpcom")) {
2942 bool IsSigned;
2943 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
2944 Name.ends_with("uq"))
2945 IsSigned = false;
2946 else if (Name.ends_with("b") || Name.ends_with("w") || Name.ends_with("d") ||
2947 Name.ends_with("q"))
2948 IsSigned = true;
2949 else
2950 llvm_unreachable("Unknown suffix");
2951
2952 unsigned Imm;
2953 if (CI->arg_size() == 3) {
2954 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2955 } else {
2956 Name = Name.substr(9); // strip off "xop.vpcom"
2957 if (Name.starts_with("lt"))
2958 Imm = 0;
2959 else if (Name.starts_with("le"))
2960 Imm = 1;
2961 else if (Name.starts_with("gt"))
2962 Imm = 2;
2963 else if (Name.starts_with("ge"))
2964 Imm = 3;
2965 else if (Name.starts_with("eq"))
2966 Imm = 4;
2967 else if (Name.starts_with("ne"))
2968 Imm = 5;
2969 else if (Name.starts_with("false"))
2970 Imm = 6;
2971 else if (Name.starts_with("true"))
2972 Imm = 7;
2973 else
2974 llvm_unreachable("Unknown condition");
2975 }
2976
2977 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
2978 } else if (IsX86 && Name.starts_with("xop.vpcmov")) {
2979 Value *Sel = CI->getArgOperand(2);
2980 Value *NotSel = Builder.CreateNot(Sel);
2981 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2982 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2983 Rep = Builder.CreateOr(Sel0, Sel1);
2984 } else if (IsX86 && (Name.starts_with("xop.vprot") ||
2985 Name.starts_with("avx512.prol") ||
2986 Name.starts_with("avx512.mask.prol"))) {
2987 Rep = upgradeX86Rotate(Builder, *CI, false);
2988 } else if (IsX86 && (Name.starts_with("avx512.pror") ||
2989 Name.starts_with("avx512.mask.pror"))) {
2990 Rep = upgradeX86Rotate(Builder, *CI, true);
2991 } else if (IsX86 && (Name.starts_with("avx512.vpshld.") ||
2992 Name.starts_with("avx512.mask.vpshld") ||
2993 Name.starts_with("avx512.maskz.vpshld"))) {
2994 bool ZeroMask = Name[11] == 'z';
2995 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2996 } else if (IsX86 && (Name.starts_with("avx512.vpshrd.") ||
2997 Name.starts_with("avx512.mask.vpshrd") ||
2998 Name.starts_with("avx512.maskz.vpshrd"))) {
2999 bool ZeroMask = Name[11] == 'z';
3000 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
3001 } else if (IsX86 && Name == "sse42.crc32.64.8") {
3002 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
3003 Intrinsic::x86_sse42_crc32_32_8);
3004 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
3005 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
3006 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
3007 } else if (IsX86 && (Name.starts_with("avx.vbroadcast.s") ||
3008 Name.starts_with("avx512.vbroadcast.s"))) {
3009 // Replace broadcasts with a series of insertelements.
3010 auto *VecTy = cast<FixedVectorType>(CI->getType());
3011 Type *EltTy = VecTy->getElementType();
3012 unsigned EltNum = VecTy->getNumElements();
3013 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
3014 Type *I32Ty = Type::getInt32Ty(C);
3015 Rep = PoisonValue::get(VecTy);
3016 for (unsigned I = 0; I < EltNum; ++I)
3017 Rep = Builder.CreateInsertElement(Rep, Load,
3018 ConstantInt::get(I32Ty, I));
3019 } else if (IsX86 && (Name.starts_with("sse41.pmovsx") ||
3020 Name.starts_with("sse41.pmovzx") ||
3021 Name.starts_with("avx2.pmovsx") ||
3022 Name.starts_with("avx2.pmovzx") ||
3023 Name.starts_with("avx512.mask.pmovsx") ||
3024 Name.starts_with("avx512.mask.pmovzx"))) {
3025 auto *DstTy = cast<FixedVectorType>(CI->getType());
3026 unsigned NumDstElts = DstTy->getNumElements();
3027
3028 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
3029 SmallVector<int, 8> ShuffleMask(NumDstElts);
3030 for (unsigned i = 0; i != NumDstElts; ++i)
3031 ShuffleMask[i] = i;
3032
3033 Value *SV =
3034 Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
3035
3036 bool DoSext = Name.contains("pmovsx");
3037 Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
3038 : Builder.CreateZExt(SV, DstTy);
3039 // If there are 3 arguments, it's a masked intrinsic so we need a select.
3040 if (CI->arg_size() == 3)
3041 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3042 CI->getArgOperand(1));
3043 } else if (Name == "avx512.mask.pmov.qd.256" ||
3044 Name == "avx512.mask.pmov.qd.512" ||
3045 Name == "avx512.mask.pmov.wb.256" ||
3046 Name == "avx512.mask.pmov.wb.512") {
3047 Type *Ty = CI->getArgOperand(1)->getType();
3048 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
3049 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3050 CI->getArgOperand(1));
3051 } else if (IsX86 && (Name.starts_with("avx.vbroadcastf128") ||
3052 Name == "avx2.vbroadcasti128")) {
3053 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
3054 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
3055 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
3056 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
3057 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
3058 PointerType::getUnqual(VT));
3059 Value *Load = Builder.CreateAlignedLoad(VT, Op, Align(1));
3060 if (NumSrcElts == 2)
3061 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
3062 else
3063 Rep = Builder.CreateShuffleVector(
3064 Load, ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
3065 } else if (IsX86 && (Name.starts_with("avx512.mask.shuf.i") ||
3066 Name.starts_with("avx512.mask.shuf.f"))) {
3067 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3068 Type *VT = CI->getType();
3069 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
3070 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
3071 unsigned ControlBitsMask = NumLanes - 1;
3072 unsigned NumControlBits = NumLanes / 2;
3073 SmallVector<int, 8> ShuffleMask(0);
3074
3075 for (unsigned l = 0; l != NumLanes; ++l) {
3076 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3077 // We actually need the other source.
3078 if (l >= NumLanes / 2)
3079 LaneMask += NumLanes;
3080 for (unsigned i = 0; i != NumElementsInLane; ++i)
3081 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3082 }
3083 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3084 CI->getArgOperand(1), ShuffleMask);
3085 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3086 CI->getArgOperand(3));
3087 }else if (IsX86 && (Name.starts_with("avx512.mask.broadcastf") ||
3088 Name.starts_with("avx512.mask.broadcasti"))) {
3089 unsigned NumSrcElts =
3090 cast<FixedVectorType>(CI->getArgOperand(0)->getType())
3091 ->getNumElements();
3092 unsigned NumDstElts =
3093 cast<FixedVectorType>(CI->getType())->getNumElements();
3094
3095 SmallVector<int, 8> ShuffleMask(NumDstElts);
3096 for (unsigned i = 0; i != NumDstElts; ++i)
3097 ShuffleMask[i] = i % NumSrcElts;
3098
3099 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3100 CI->getArgOperand(0),
3101 ShuffleMask);
3102 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3103 CI->getArgOperand(1));
3104 } else if (IsX86 && (Name.starts_with("avx2.pbroadcast") ||
3105 Name.starts_with("avx2.vbroadcast") ||
3106 Name.starts_with("avx512.pbroadcast") ||
3107 Name.starts_with("avx512.mask.broadcast.s"))) {
3108 // Replace vp?broadcasts with a vector shuffle.
3109 Value *Op = CI->getArgOperand(0);
3110 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
3111 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
3114 Rep = Builder.CreateShuffleVector(Op, M);
3115
3116 if (CI->arg_size() == 3)
3117 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3118 CI->getArgOperand(1));
3119 } else if (IsX86 && (Name.starts_with("sse2.padds.") ||
3120 Name.starts_with("avx2.padds.") ||
3121 Name.starts_with("avx512.padds.") ||
3122 Name.starts_with("avx512.mask.padds."))) {
3123 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3124 } else if (IsX86 && (Name.starts_with("sse2.psubs.") ||
3125 Name.starts_with("avx2.psubs.") ||
3126 Name.starts_with("avx512.psubs.") ||
3127 Name.starts_with("avx512.mask.psubs."))) {
3128 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3129 } else if (IsX86 && (Name.starts_with("sse2.paddus.") ||
3130 Name.starts_with("avx2.paddus.") ||
3131 Name.starts_with("avx512.mask.paddus."))) {
3132 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3133 } else if (IsX86 && (Name.starts_with("sse2.psubus.") ||
3134 Name.starts_with("avx2.psubus.") ||
3135 Name.starts_with("avx512.mask.psubus."))) {
3136 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3137 } else if (IsX86 && Name.starts_with("avx512.mask.palignr.")) {
3139 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3140 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4),
3141 false);
3142 } else if (IsX86 && Name.starts_with("avx512.mask.valign.")) {
3144 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3145 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4),
3146 true);
3147 } else if (IsX86 && (Name == "sse2.psll.dq" ||
3148 Name == "avx2.psll.dq")) {
3149 // 128/256-bit shift left specified in bits.
3150 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3151 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3152 Shift / 8); // Shift is in bits.
3153 } else if (IsX86 && (Name == "sse2.psrl.dq" ||
3154 Name == "avx2.psrl.dq")) {
3155 // 128/256-bit shift right specified in bits.
3156 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3157 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3158 Shift / 8); // Shift is in bits.
3159 } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
3160 Name == "avx2.psll.dq.bs" ||
3161 Name == "avx512.psll.dq.512")) {
3162 // 128/256/512-bit shift left specified in bytes.
3163 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3164 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3165 } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
3166 Name == "avx2.psrl.dq.bs" ||
3167 Name == "avx512.psrl.dq.512")) {
3168 // 128/256/512-bit shift right specified in bytes.
3169 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3170 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3171 } else if (IsX86 && (Name == "sse41.pblendw" ||
3172 Name.starts_with("sse41.blendp") ||
3173 Name.starts_with("avx.blend.p") ||
3174 Name == "avx2.pblendw" ||
3175 Name.starts_with("avx2.pblendd."))) {
3176 Value *Op0 = CI->getArgOperand(0);
3177 Value *Op1 = CI->getArgOperand(1);
3178 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3179 auto *VecTy = cast<FixedVectorType>(CI->getType());
3180 unsigned NumElts = VecTy->getNumElements();
3181
3182 SmallVector<int, 16> Idxs(NumElts);
3183 for (unsigned i = 0; i != NumElts; ++i)
3184 Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
3185
3186 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3187 } else if (IsX86 && (Name.starts_with("avx.vinsertf128.") ||
3188 Name == "avx2.vinserti128" ||
3189 Name.starts_with("avx512.mask.insert"))) {
3190 Value *Op0 = CI->getArgOperand(0);
3191 Value *Op1 = CI->getArgOperand(1);
3192 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3193 unsigned DstNumElts =
3194 cast<FixedVectorType>(CI->getType())->getNumElements();
3195 unsigned SrcNumElts =
3196 cast<FixedVectorType>(Op1->getType())->getNumElements();
3197 unsigned Scale = DstNumElts / SrcNumElts;
3198
3199 // Mask off the high bits of the immediate value; hardware ignores those.
3200 Imm = Imm % Scale;
3201
3202 // Extend the second operand into a vector the size of the destination.
3203 SmallVector<int, 8> Idxs(DstNumElts);
3204 for (unsigned i = 0; i != SrcNumElts; ++i)
3205 Idxs[i] = i;
3206 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3207 Idxs[i] = SrcNumElts;
3208 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3209
3210 // Insert the second operand into the first operand.
3211
3212 // Note that there is no guarantee that instruction lowering will actually
3213 // produce a vinsertf128 instruction for the created shuffles. In
3214 // particular, the 0 immediate case involves no lane changes, so it can
3215 // be handled as a blend.
3216
3217 // Example of shuffle mask for 32-bit elements:
3218 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3219 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3220
3221 // First fill with identify mask.
3222 for (unsigned i = 0; i != DstNumElts; ++i)
3223 Idxs[i] = i;
3224 // Then replace the elements where we need to insert.
3225 for (unsigned i = 0; i != SrcNumElts; ++i)
3226 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3227 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3228
3229 // If the intrinsic has a mask operand, handle that.
3230 if (CI->arg_size() == 5)
3231 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3232 CI->getArgOperand(3));
3233 } else if (IsX86 && (Name.starts_with("avx.vextractf128.") ||
3234 Name == "avx2.vextracti128" ||
3235 Name.starts_with("avx512.mask.vextract"))) {
3236 Value *Op0 = CI->getArgOperand(0);
3237 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3238 unsigned DstNumElts =
3239 cast<FixedVectorType>(CI->getType())->getNumElements();
3240 unsigned SrcNumElts =
3241 cast<FixedVectorType>(Op0->getType())->getNumElements();
3242 unsigned Scale = SrcNumElts / DstNumElts;
3243
3244 // Mask off the high bits of the immediate value; hardware ignores those.
3245 Imm = Imm % Scale;
3246
3247 // Get indexes for the subvector of the input vector.
3248 SmallVector<int, 8> Idxs(DstNumElts);
3249 for (unsigned i = 0; i != DstNumElts; ++i) {
3250 Idxs[i] = i + (Imm * DstNumElts);
3251 }
3252 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3253
3254 // If the intrinsic has a mask operand, handle that.
3255 if (CI->arg_size() == 4)
3256 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3257 CI->getArgOperand(2));
3258 } else if (!IsX86 && Name == "stackprotectorcheck") {
3259 Rep = nullptr;
3260 } else if (IsX86 && (Name.starts_with("avx512.mask.perm.df.") ||
3261 Name.starts_with("avx512.mask.perm.di."))) {
3262 Value *Op0 = CI->getArgOperand(0);
3263 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3264 auto *VecTy = cast<FixedVectorType>(CI->getType());
3265 unsigned NumElts = VecTy->getNumElements();
3266
3267 SmallVector<int, 8> Idxs(NumElts);
3268 for (unsigned i = 0; i != NumElts; ++i)
3269 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3270
3271 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3272
3273 if (CI->arg_size() == 4)
3274 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3275 CI->getArgOperand(2));
3276 } else if (IsX86 && (Name.starts_with("avx.vperm2f128.") ||
3277 Name == "avx2.vperm2i128")) {
3278 // The immediate permute control byte looks like this:
3279 // [1:0] - select 128 bits from sources for low half of destination
3280 // [2] - ignore
3281 // [3] - zero low half of destination
3282 // [5:4] - select 128 bits from sources for high half of destination
3283 // [6] - ignore
3284 // [7] - zero high half of destination
3285
3286 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3287
3288 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3289 unsigned HalfSize = NumElts / 2;
3290 SmallVector<int, 8> ShuffleMask(NumElts);
3291
3292 // Determine which operand(s) are actually in use for this instruction.
3293 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3294 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3295
3296 // If needed, replace operands based on zero mask.
3297 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3298 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3299
3300 // Permute low half of result.
3301 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3302 for (unsigned i = 0; i < HalfSize; ++i)
3303 ShuffleMask[i] = StartIndex + i;
3304
3305 // Permute high half of result.
3306 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3307 for (unsigned i = 0; i < HalfSize; ++i)
3308 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3309
3310 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3311
3312 } else if (IsX86 && (Name.starts_with("avx.vpermil.") ||
3313 Name == "sse2.pshuf.d" ||
3314 Name.starts_with("avx512.mask.vpermil.p") ||
3315 Name.starts_with("avx512.mask.pshuf.d."))) {
3316 Value *Op0 = CI->getArgOperand(0);
3317 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3318 auto *VecTy = cast<FixedVectorType>(CI->getType());
3319 unsigned NumElts = VecTy->getNumElements();
3320 // Calculate the size of each index in the immediate.
3321 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3322 unsigned IdxMask = ((1 << IdxSize) - 1);
3323
3324 SmallVector<int, 8> Idxs(NumElts);
3325 // Lookup the bits for this element, wrapping around the immediate every
3326 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3327 // to offset by the first index of each group.
3328 for (unsigned i = 0; i != NumElts; ++i)
3329 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3330
3331 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3332
3333 if (CI->arg_size() == 4)
3334 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3335 CI->getArgOperand(2));
3336 } else if (IsX86 && (Name == "sse2.pshufl.w" ||
3337 Name.starts_with("avx512.mask.pshufl.w."))) {
3338 Value *Op0 = CI->getArgOperand(0);
3339 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3340 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3341
3342 SmallVector<int, 16> Idxs(NumElts);
3343 for (unsigned l = 0; l != NumElts; l += 8) {
3344 for (unsigned i = 0; i != 4; ++i)
3345 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3346 for (unsigned i = 4; i != 8; ++i)
3347 Idxs[i + l] = i + l;
3348 }
3349
3350 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3351
3352 if (CI->arg_size() == 4)
3353 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3354 CI->getArgOperand(2));
3355 } else if (IsX86 && (Name == "sse2.pshufh.w" ||
3356 Name.starts_with("avx512.mask.pshufh.w."))) {
3357 Value *Op0 = CI->getArgOperand(0);
3358 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3359 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3360
3361 SmallVector<int, 16> Idxs(NumElts);
3362 for (unsigned l = 0; l != NumElts; l += 8) {
3363 for (unsigned i = 0; i != 4; ++i)
3364 Idxs[i + l] = i + l;
3365 for (unsigned i = 0; i != 4; ++i)
3366 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3367 }
3368
3369 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3370
3371 if (CI->arg_size() == 4)
3372 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3373 CI->getArgOperand(2));
3374 } else if (IsX86 && Name.starts_with("avx512.mask.shuf.p")) {
3375 Value *Op0 = CI->getArgOperand(0);
3376 Value *Op1 = CI->getArgOperand(1);
3377 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3378 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3379
3380 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
3381 unsigned HalfLaneElts = NumLaneElts / 2;
3382
3383 SmallVector<int, 16> Idxs(NumElts);
3384 for (unsigned i = 0; i != NumElts; ++i) {
3385 // Base index is the starting element of the lane.
3386 Idxs[i] = i - (i % NumLaneElts);
3387 // If we are half way through the lane switch to the other source.
3388 if ((i % NumLaneElts) >= HalfLaneElts)
3389 Idxs[i] += NumElts;
3390 // Now select the specific element. By adding HalfLaneElts bits from
3391 // the immediate. Wrapping around the immediate every 8-bits.
3392 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3393 }
3394
3395 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3396
3397 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3398 CI->getArgOperand(3));
3399 } else if (IsX86 && (Name.starts_with("avx512.mask.movddup") ||
3400 Name.starts_with("avx512.mask.movshdup") ||
3401 Name.starts_with("avx512.mask.movsldup"))) {
3402 Value *Op0 = CI->getArgOperand(0);
3403 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3404 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
3405
3406 unsigned Offset = 0;
3407 if (Name.starts_with("avx512.mask.movshdup."))
3408 Offset = 1;
3409
3410 SmallVector<int, 16> Idxs(NumElts);
3411 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3412 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3413 Idxs[i + l + 0] = i + l + Offset;
3414 Idxs[i + l + 1] = i + l + Offset;
3415 }
3416
3417 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3418
3419 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3420 CI->getArgOperand(1));
3421 } else if (IsX86 && (Name.starts_with("avx512.mask.punpckl") ||
3422 Name.starts_with("avx512.mask.unpckl."))) {
3423 Value *Op0 = CI->getArgOperand(0);
3424 Value *Op1 = CI->getArgOperand(1);
3425 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3426 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
3427
3428 SmallVector<int, 64> Idxs(NumElts);
3429 for (int l = 0; l != NumElts; l += NumLaneElts)
3430 for (int i = 0; i != NumLaneElts; ++i)
3431 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3432
3433 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3434
3435 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3436 CI->getArgOperand(2));
3437 } else if (IsX86 && (Name.starts_with("avx512.mask.punpckh") ||
3438 Name.starts_with("avx512.mask.unpckh."))) {
3439 Value *Op0 = CI->getArgOperand(0);
3440 Value *Op1 = CI->getArgOperand(1);
3441 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3442 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
3443
3444 SmallVector<int, 64> Idxs(NumElts);
3445 for (int l = 0; l != NumElts; l += NumLaneElts)
3446 for (int i = 0; i != NumLaneElts; ++i)
3447 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3448
3449 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3450
3451 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3452 CI->getArgOperand(2));
3453 } else if (IsX86 && (Name.starts_with("avx512.mask.and.") ||
3454 Name.starts_with("avx512.mask.pand."))) {
3455 VectorType *FTy = cast<VectorType>(CI->getType());
3456 VectorType *ITy = VectorType::getInteger(FTy);
3457 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3458 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3459 Rep = Builder.CreateBitCast(Rep, FTy);
3460 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3461 CI->getArgOperand(2));
3462 } else if (IsX86 && (Name.starts_with("avx512.mask.andn.") ||
3463 Name.starts_with("avx512.mask.pandn."))) {
3464 VectorType *FTy = cast<VectorType>(CI->getType());
3465 VectorType *ITy = VectorType::getInteger(FTy);
3466 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3467 Rep = Builder.CreateAnd(Rep,
3468 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3469 Rep = Builder.CreateBitCast(Rep, FTy);
3470 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3471 CI->getArgOperand(2));
3472 } else if (IsX86 && (Name.starts_with("avx512.mask.or.") ||
3473 Name.starts_with("avx512.mask.por."))) {
3474 VectorType *FTy = cast<VectorType>(CI->getType());
3475 VectorType *ITy = VectorType::getInteger(FTy);
3476 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3477 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3478 Rep = Builder.CreateBitCast(Rep, FTy);
3479 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3480 CI->getArgOperand(2));
3481 } else if (IsX86 && (Name.starts_with("avx512.mask.xor.") ||
3482 Name.starts_with("avx512.mask.pxor."))) {
3483 VectorType *FTy = cast<VectorType>(CI->getType());
3484 VectorType *ITy = VectorType::getInteger(FTy);
3485 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3486 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3487 Rep = Builder.CreateBitCast(Rep, FTy);
3488 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3489 CI->getArgOperand(2));
3490 } else if (IsX86 && Name.starts_with("avx512.mask.padd.")) {
3491 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3492 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3493 CI->getArgOperand(2));
3494 } else if (IsX86 && Name.starts_with("avx512.mask.psub.")) {
3495 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3496 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3497 CI->getArgOperand(2));
3498 } else if (IsX86 && Name.starts_with("avx512.mask.pmull.")) {
3499 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3500 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3501 CI->getArgOperand(2));
3502 } else if (IsX86 && Name.starts_with("avx512.mask.add.p")) {
3503 if (Name.ends_with(".512")) {
3504 Intrinsic::ID IID;
3505 if (Name[17] == 's')
3506 IID = Intrinsic::x86_avx512_add_ps_512;
3507 else
3508 IID = Intrinsic::x86_avx512_add_pd_512;
3509
3510 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3511 { CI->getArgOperand(0), CI->getArgOperand(1),
3512 CI->getArgOperand(4) });
3513 } else {
3514 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3515 }
3516 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3517 CI->getArgOperand(2));
3518 } else if (IsX86 && Name.starts_with("avx512.mask.div.p")) {
3519 if (Name.ends_with(".512")) {
3520 Intrinsic::ID IID;
3521 if (Name[17] == 's')
3522 IID = Intrinsic::x86_avx512_div_ps_512;
3523 else
3524 IID = Intrinsic::x86_avx512_div_pd_512;
3525
3526 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3527 { CI->getArgOperand(0), CI->getArgOperand(1),
3528 CI->getArgOperand(4) });
3529 } else {
3530 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3531 }
3532 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3533 CI->getArgOperand(2));
3534 } else if (IsX86 && Name.starts_with("avx512.mask.mul.p")) {
3535 if (Name.ends_with(".512")) {
3536 Intrinsic::ID IID;
3537 if (Name[17] == 's')
3538 IID = Intrinsic::x86_avx512_mul_ps_512;
3539 else
3540 IID = Intrinsic::x86_avx512_mul_pd_512;
3541
3542 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3543 { CI->getArgOperand(0), CI->getArgOperand(1),
3544 CI->getArgOperand(4) });
3545 } else {
3546 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3547 }
3548 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3549 CI->getArgOperand(2));
3550 } else if (IsX86 && Name.starts_with("avx512.mask.sub.p")) {
3551 if (Name.ends_with(".512")) {
3552 Intrinsic::ID IID;
3553 if (Name[17] == 's')
3554 IID = Intrinsic::x86_avx512_sub_ps_512;
3555 else
3556 IID = Intrinsic::x86_avx512_sub_pd_512;
3557
3558 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3559 { CI->getArgOperand(0), CI->getArgOperand(1),
3560 CI->getArgOperand(4) });
3561 } else {
3562 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3563 }
3564 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3565 CI->getArgOperand(2));
3566 } else if (IsX86 && (Name.starts_with("avx512.mask.max.p") ||
3567 Name.starts_with("avx512.mask.min.p")) &&
3568 Name.drop_front(18) == ".512") {
3569 bool IsDouble = Name[17] == 'd';
3570 bool IsMin = Name[13] == 'i';
3571 static const Intrinsic::ID MinMaxTbl[2][2] = {
3572 { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
3573 { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
3574 };
3575 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3576
3577 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3578 { CI->getArgOperand(0), CI->getArgOperand(1),
3579 CI->getArgOperand(4) });
3580 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3581 CI->getArgOperand(2));
3582 } else if (IsX86 && Name.starts_with("avx512.mask.lzcnt.")) {
3583 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
3584 Intrinsic::ctlz,
3585 CI->getType()),
3586 { CI->getArgOperand(0), Builder.getInt1(false) });
3587 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3588 CI->getArgOperand(1));
3589 } else if (IsX86 && Name.starts_with("avx512.mask.psll")) {
3590 bool IsImmediate = Name[16] == 'i' ||
3591 (Name.size() > 18 && Name[18] == 'i');
3592 bool IsVariable = Name[16] == 'v';
3593 char Size = Name[16] == '.' ? Name[17] :
3594 Name[17] == '.' ? Name[18] :
3595 Name[18] == '.' ? Name[19] :
3596 Name[20];
3597
3598 Intrinsic::ID IID;
3599 if (IsVariable && Name[17] != '.') {
3600 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3601 IID = Intrinsic::x86_avx2_psllv_q;
3602 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3603 IID = Intrinsic::x86_avx2_psllv_q_256;
3604 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3605 IID = Intrinsic::x86_avx2_psllv_d;
3606 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3607 IID = Intrinsic::x86_avx2_psllv_d_256;
3608 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3609 IID = Intrinsic::x86_avx512_psllv_w_128;
3610 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3611 IID = Intrinsic::x86_avx512_psllv_w_256;
3612 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3613 IID = Intrinsic::x86_avx512_psllv_w_512;
3614 else
3615 llvm_unreachable("Unexpected size");
3616 } else if (Name.ends_with(".128")) {
3617 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3618 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3619 : Intrinsic::x86_sse2_psll_d;
3620 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3621 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3622 : Intrinsic::x86_sse2_psll_q;
3623 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3624 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3625 : Intrinsic::x86_sse2_psll_w;
3626 else
3627 llvm_unreachable("Unexpected size");
3628 } else if (Name.ends_with(".256")) {
3629 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3630 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3631 : Intrinsic::x86_avx2_psll_d;
3632 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3633 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3634 : Intrinsic::x86_avx2_psll_q;
3635 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3636 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3637 : Intrinsic::x86_avx2_psll_w;
3638 else
3639 llvm_unreachable("Unexpected size");
3640 } else {
3641 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3642 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
3643 IsVariable ? Intrinsic::x86_avx512_psllv_d_512 :
3644 Intrinsic::x86_avx512_psll_d_512;
3645 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3646 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
3647 IsVariable ? Intrinsic::x86_avx512_psllv_q_512 :
3648 Intrinsic::x86_avx512_psll_q_512;
3649 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3650 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3651 : Intrinsic::x86_avx512_psll_w_512;
3652 else
3653 llvm_unreachable("Unexpected size");
3654 }
3655
3656 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3657 } else if (IsX86 && Name.starts_with("avx512.mask.psrl")) {
3658 bool IsImmediate = Name[16] == 'i' ||
3659 (Name.size() > 18 && Name[18] == 'i');
3660 bool IsVariable = Name[16] == 'v';
3661 char Size = Name[16] == '.' ? Name[17] :
3662 Name[17] == '.' ? Name[18] :
3663 Name[18] == '.' ? Name[19] :
3664 Name[20];
3665
3666 Intrinsic::ID IID;
3667 if (IsVariable && Name[17] != '.') {
3668 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3669 IID = Intrinsic::x86_avx2_psrlv_q;
3670 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3671 IID = Intrinsic::x86_avx2_psrlv_q_256;
3672 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3673 IID = Intrinsic::x86_avx2_psrlv_d;
3674 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3675 IID = Intrinsic::x86_avx2_psrlv_d_256;
3676 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3677 IID = Intrinsic::x86_avx512_psrlv_w_128;
3678 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3679 IID = Intrinsic::x86_avx512_psrlv_w_256;
3680 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3681 IID = Intrinsic::x86_avx512_psrlv_w_512;
3682 else
3683 llvm_unreachable("Unexpected size");
3684 } else if (Name.ends_with(".128")) {
3685 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3686 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3687 : Intrinsic::x86_sse2_psrl_d;
3688 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3689 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3690 : Intrinsic::x86_sse2_psrl_q;
3691 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3692 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3693 : Intrinsic::x86_sse2_psrl_w;
3694 else
3695 llvm_unreachable("Unexpected size");
3696 } else if (Name.ends_with(".256")) {
3697 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3698 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3699 : Intrinsic::x86_avx2_psrl_d;
3700 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3701 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3702 : Intrinsic::x86_avx2_psrl_q;
3703 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3704 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3705 : Intrinsic::x86_avx2_psrl_w;
3706 else
3707 llvm_unreachable("Unexpected size");
3708 } else {
3709 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3710 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
3711 IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 :
3712 Intrinsic::x86_avx512_psrl_d_512;
3713 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3714 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
3715 IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 :
3716 Intrinsic::x86_avx512_psrl_q_512;
3717 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3718 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3719 : Intrinsic::x86_avx512_psrl_w_512;
3720 else
3721 llvm_unreachable("Unexpected size");
3722 }
3723
3724 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3725 } else if (IsX86 && Name.starts_with("avx512.mask.psra")) {
3726 bool IsImmediate = Name[16] == 'i' ||
3727 (Name.size() > 18 && Name[18] == 'i');
3728 bool IsVariable = Name[16] == 'v';
3729 char Size = Name[16] == '.' ? Name[17] :
3730 Name[17] == '.' ? Name[18] :
3731 Name[18] == '.' ? Name[19] :
3732 Name[20];
3733
3734 Intrinsic::ID IID;
3735 if (IsVariable && Name[17] != '.') {
3736 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3737 IID = Intrinsic::x86_avx2_psrav_d;
3738 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3739 IID = Intrinsic::x86_avx2_psrav_d_256;
3740 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3741 IID = Intrinsic::x86_avx512_psrav_w_128;
3742 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3743 IID = Intrinsic::x86_avx512_psrav_w_256;
3744 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3745 IID = Intrinsic::x86_avx512_psrav_w_512;
3746 else
3747 llvm_unreachable("Unexpected size");
3748 } else if (Name.ends_with(".128")) {
3749 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3750 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3751 : Intrinsic::x86_sse2_psra_d;
3752 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3753 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
3754 IsVariable ? Intrinsic::x86_avx512_psrav_q_128 :
3755 Intrinsic::x86_avx512_psra_q_128;
3756 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3757 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3758 : Intrinsic::x86_sse2_psra_w;
3759 else
3760 llvm_unreachable("Unexpected size");
3761 } else if (Name.ends_with(".256")) {
3762 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3763 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3764 : Intrinsic::x86_avx2_psra_d;
3765 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3766 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
3767 IsVariable ? Intrinsic::x86_avx512_psrav_q_256 :
3768 Intrinsic::x86_avx512_psra_q_256;
3769 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3770 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3771 : Intrinsic::x86_avx2_psra_w;
3772 else
3773 llvm_unreachable("Unexpected size");
3774 } else {
3775 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3776 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
3777 IsVariable ? Intrinsic::x86_avx512_psrav_d_512 :
3778 Intrinsic::x86_avx512_psra_d_512;
3779 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3780 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
3781 IsVariable ? Intrinsic::x86_avx512_psrav_q_512 :
3782 Intrinsic::x86_avx512_psra_q_512;
3783 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3784 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3785 : Intrinsic::x86_avx512_psra_w_512;
3786 else
3787 llvm_unreachable("Unexpected size");
3788 }
3789
3790 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3791 } else if (IsX86 && Name.starts_with("avx512.mask.move.s")) {
3792 Rep = upgradeMaskedMove(Builder, *CI);
3793 } else if (IsX86 && Name.starts_with("avx512.cvtmask2")) {
3794 Rep = upgradeMaskToInt(Builder, *CI);
3795 } else if (IsX86 && Name.ends_with(".movntdqa")) {
3796 MDNode *Node = MDNode::get(
3797 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
3798
3799 Value *Ptr = CI->getArgOperand(0);
3800
3801 // Convert the type of the pointer to a pointer to the stored type.
3802 Value *BC = Builder.CreateBitCast(
3803 Ptr, PointerType::getUnqual(CI->getType()), "cast");
3804 LoadInst *LI = Builder.CreateAlignedLoad(
3805 CI->getType(), BC,
3807 LI->setMetadata(LLVMContext::MD_nontemporal, Node);
3808 Rep = LI;
3809 } else if (IsX86 && (Name.starts_with("fma.vfmadd.") ||
3810 Name.starts_with("fma.vfmsub.") ||
3811 Name.starts_with("fma.vfnmadd.") ||
3812 Name.starts_with("fma.vfnmsub."))) {
3813 bool NegMul = Name[6] == 'n';
3814 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3815 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3816
3817 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3818 CI->getArgOperand(2) };
3819
3820 if (IsScalar) {
3821 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3822 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3823 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3824 }
3825
3826 if (NegMul && !IsScalar)
3827 Ops[0] = Builder.CreateFNeg(Ops[0]);
3828 if (NegMul && IsScalar)
3829 Ops[1] = Builder.CreateFNeg(Ops[1]);
3830 if (NegAcc)
3831 Ops[2] = Builder.CreateFNeg(Ops[2]);
3832
3834 Intrinsic::fma,
3835 Ops[0]->getType()),
3836 Ops);
3837
3838 if (IsScalar)
3839 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
3840 (uint64_t)0);
3841 } else if (IsX86 && Name.starts_with("fma4.vfmadd.s")) {
3842 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3843 CI->getArgOperand(2) };
3844
3845 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3846 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3847 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3848
3850 Intrinsic::fma,
3851 Ops[0]->getType()),
3852 Ops);
3853
3855 Rep, (uint64_t)0);
3856 } else if (IsX86 && (Name.starts_with("avx512.mask.vfmadd.s") ||
3857 Name.starts_with("avx512.maskz.vfmadd.s") ||
3858 Name.starts_with("avx512.mask3.vfmadd.s") ||
3859 Name.starts_with("avx512.mask3.vfmsub.s") ||
3860 Name.starts_with("avx512.mask3.vfnmsub.s"))) {
3861 bool IsMask3 = Name[11] == '3';
3862 bool IsMaskZ = Name[11] == 'z';
3863 // Drop the "avx512.mask." to make it easier.
3864 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3865 bool NegMul = Name[2] == 'n';
3866 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3867
3868 Value *A = CI->getArgOperand(0);
3869 Value *B = CI->getArgOperand(1);
3870 Value *C = CI->getArgOperand(2);
3871
3872 if (NegMul && (IsMask3 || IsMaskZ))
3873 A = Builder.CreateFNeg(A);
3874 if (NegMul && !(IsMask3 || IsMaskZ))
3875 B = Builder.CreateFNeg(B);
3876 if (NegAcc)
3877 C = Builder.CreateFNeg(C);
3878
3879 A = Builder.CreateExtractElement(A, (uint64_t)0);
3880 B = Builder.CreateExtractElement(B, (uint64_t)0);
3881 C = Builder.CreateExtractElement(C, (uint64_t)0);
3882
3883 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3884 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3885 Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
3886
3887 Intrinsic::ID IID;
3888 if (Name.back() == 'd')
3889 IID = Intrinsic::x86_avx512_vfmadd_f64;
3890 else
3891 IID = Intrinsic::x86_avx512_vfmadd_f32;
3892 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID);
3893 Rep = Builder.CreateCall(FMA, Ops);
3894 } else {
3896 Intrinsic::fma,
3897 A->getType());
3898 Rep = Builder.CreateCall(FMA, { A, B, C });
3899 }
3900
3901 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
3902 IsMask3 ? C : A;
3903
3904 // For Mask3 with NegAcc, we need to create a new extractelement that
3905 // avoids the negation above.
3906 if (NegAcc && IsMask3)
3907 PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
3908 (uint64_t)0);
3909
3910 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
3911 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
3912 Rep, (uint64_t)0);
3913 } else if (IsX86 && (Name.starts_with("avx512.mask.vfmadd.p") ||
3914 Name.starts_with("avx512.mask.vfnmadd.p") ||
3915 Name.starts_with("avx512.mask.vfnmsub.p") ||
3916 Name.starts_with("avx512.mask3.vfmadd.p") ||
3917 Name.starts_with("avx512.mask3.vfmsub.p") ||
3918 Name.starts_with("avx512.mask3.vfnmsub.p") ||
3919 Name.starts_with("avx512.maskz.vfmadd.p"))) {
3920 bool IsMask3 = Name[11] == '3';
3921 bool IsMaskZ = Name[11] == 'z';
3922 // Drop the "avx512.mask." to make it easier.
3923 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3924 bool NegMul = Name[2] == 'n';
3925 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3926
3927 Value *A = CI->getArgOperand(0);
3928 Value *B = CI->getArgOperand(1);
3929 Value *C = CI->getArgOperand(2);
3930
3931 if (NegMul && (IsMask3 || IsMaskZ))
3932 A = Builder.CreateFNeg(A);
3933 if (NegMul && !(IsMask3 || IsMaskZ))
3934 B = Builder.CreateFNeg(B);
3935 if (NegAcc)
3936 C = Builder.CreateFNeg(C);
3937
3938 if (CI->arg_size() == 5 &&
3939 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3940 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3941 Intrinsic::ID IID;
3942 // Check the character before ".512" in string.
3943 if (Name[Name.size()-5] == 's')
3944 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3945 else
3946 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3947
3948 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3949 { A, B, C, CI->getArgOperand(4) });
3950 } else {
3952 Intrinsic::fma,
3953 A->getType());
3954 Rep = Builder.CreateCall(FMA, { A, B, C });
3955 }
3956
3957 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3958 IsMask3 ? CI->getArgOperand(2) :
3959 CI->getArgOperand(0);
3960
3961 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3962 } else if (IsX86 && Name.starts_with("fma.vfmsubadd.p")) {
3963 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3964 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3965 Intrinsic::ID IID;
3966 if (VecWidth == 128 && EltWidth == 32)
3967 IID = Intrinsic::x86_fma_vfmaddsub_ps;
3968 else if (VecWidth == 256 && EltWidth == 32)
3969 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
3970 else if (VecWidth == 128 && EltWidth == 64)
3971 IID = Intrinsic::x86_fma_vfmaddsub_pd;
3972 else if (VecWidth == 256 && EltWidth == 64)
3973 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
3974 else
3975 llvm_unreachable("Unexpected intrinsic");
3976
3977 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3978 CI->getArgOperand(2) };
3979 Ops[2] = Builder.CreateFNeg(Ops[2]);
3980 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3981 Ops);
3982 } else if (IsX86 && (Name.starts_with("avx512.mask.vfmaddsub.p") ||
3983 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
3984 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
3985 Name.starts_with("avx512.mask3.vfmsubadd.p"))) {
3986 bool IsMask3 = Name[11] == '3';
3987 bool IsMaskZ = Name[11] == 'z';
3988 // Drop the "avx512.mask." to make it easier.
3989 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3990 bool IsSubAdd = Name[3] == 's';
3991 if (CI->arg_size() == 5) {
3992 Intrinsic::ID IID;
3993 // Check the character before ".512" in string.
3994 if (Name[Name.size()-5] == 's')
3995 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3996 else
3997 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3998
3999 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
4000 CI->getArgOperand(2), CI->getArgOperand(4) };
4001 if (IsSubAdd)
4002 Ops[2] = Builder.CreateFNeg(Ops[2]);
4003
4004 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
4005 Ops);
4006 } else {
4007 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4008
4009 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
4010 CI->getArgOperand(2) };
4011
4012 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
4013 Ops[0]->getType());
4014 Value *Odd = Builder.CreateCall(FMA, Ops);
4015 Ops[2] = Builder.CreateFNeg(Ops[2]);
4016 Value *Even = Builder.CreateCall(FMA, Ops);
4017
4018 if (IsSubAdd)
4019 std::swap(Even, Odd);
4020
4021 SmallVector<int, 32> Idxs(NumElts);
4022 for (int i = 0; i != NumElts; ++i)
4023 Idxs[i] = i + (i % 2) * NumElts;
4024
4025 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4026 }
4027
4028 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
4029 IsMask3 ? CI->getArgOperand(2) :
4030 CI->getArgOperand(0);
4031
4032 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4033 } else if (IsX86 && (Name.starts_with("avx512.mask.pternlog.") ||
4034 Name.starts_with("avx512.maskz.pternlog."))) {
4035 bool ZeroMask = Name[11] == 'z';
4036 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4037 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4038 Intrinsic::ID IID;
4039 if (VecWidth == 128 && EltWidth == 32)
4040 IID = Intrinsic::x86_avx512_pternlog_d_128;
4041 else if (VecWidth == 256 && EltWidth == 32)
4042 IID = Intrinsic::x86_avx512_pternlog_d_256;
4043 else if (VecWidth == 512 && EltWidth == 32)
4044 IID = Intrinsic::x86_avx512_pternlog_d_512;
4045 else if (VecWidth == 128 && EltWidth == 64)
4046 IID = Intrinsic::x86_avx512_pternlog_q_128;
4047 else if (VecWidth == 256 && EltWidth == 64)
4048 IID = Intrinsic::x86_avx512_pternlog_q_256;
4049 else if (VecWidth == 512 && EltWidth == 64)
4050 IID = Intrinsic::x86_avx512_pternlog_q_512;
4051 else
4052 llvm_unreachable("Unexpected intrinsic");
4053
4054 Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
4055 CI->getArgOperand(2), CI->getArgOperand(3) };
4056 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
4057 Args);
4058 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4059 : CI->getArgOperand(0);
4060 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
4061 } else if (IsX86 && (Name.starts_with("avx512.mask.vpmadd52") ||
4062 Name.starts_with("avx512.maskz.vpmadd52"))) {
4063 bool ZeroMask = Name[11] == 'z';
4064 bool High = Name[20] == 'h' || Name[21] == 'h';
4065 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4066 Intrinsic::ID IID;
4067 if (VecWidth == 128 && !High)
4068 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4069 else if (VecWidth == 256 && !High)
4070 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4071 else if (VecWidth == 512 && !High)
4072 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4073 else if (VecWidth == 128 && High)
4074 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4075 else if (VecWidth == 256 && High)
4076 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4077 else if (VecWidth == 512 && High)
4078 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4079 else
4080 llvm_unreachable("Unexpected intrinsic");
4081
4082 Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
4083 CI->getArgOperand(2) };
4084 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
4085 Args);
4086 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4087 : CI->getArgOperand(0);
4088 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4089 } else if (IsX86 && (Name.starts_with("avx512.mask.vpermi2var.") ||
4090 Name.starts_with("avx512.mask.vpermt2var.") ||
4091 Name.starts_with("avx512.maskz.vpermt2var."))) {
4092 bool ZeroMask = Name[11] == 'z';
4093 bool IndexForm = Name[17] == 'i';
4094 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
4095 } else if (IsX86 && (Name.starts_with("avx512.mask.vpdpbusd.") ||
4096 Name.starts_with("avx512.maskz.vpdpbusd.") ||
4097 Name.starts_with("avx512.mask.vpdpbusds.") ||
4098 Name.starts_with("avx512.maskz.vpdpbusds."))) {
4099 bool ZeroMask = Name[11] == 'z';
4100 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4101 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4102 Intrinsic::ID IID;
4103 if (VecWidth == 128 && !IsSaturating)
4104 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4105 else if (VecWidth == 256 && !IsSaturating)
4106 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4107 else if (VecWidth == 512 && !IsSaturating)
4108 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4109 else if (VecWidth == 128 && IsSaturating)
4110 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4111 else if (VecWidth == 256 && IsSaturating)
4112 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4113 else if (VecWidth == 512 && IsSaturating)
4114 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4115 else
4116 llvm_unreachable("Unexpected intrinsic");
4117
4118 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
4119 CI->getArgOperand(2) };
4120 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
4121 Args);
4122 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4123 : CI->getArgOperand(0);
4124 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4125 } else if (IsX86 && (Name.starts_with("avx512.mask.vpdpwssd.") ||
4126 Name.starts_with("avx512.maskz.vpdpwssd.") ||
4127 Name.starts_with("avx512.mask.vpdpwssds.") ||
4128 Name.starts_with("avx512.maskz.vpdpwssds."))) {
4129 bool ZeroMask = Name[11] == 'z';
4130 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4131 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4132 Intrinsic::ID IID;
4133 if (VecWidth == 128 && !IsSaturating)
4134 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4135 else if (VecWidth == 256 && !IsSaturating)
4136 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4137 else if (VecWidth == 512 && !IsSaturating)
4138 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4139 else if (VecWidth == 128 && IsSaturating)
4140 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4141 else if (VecWidth == 256 && IsSaturating)
4142 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4143 else if (VecWidth == 512 && IsSaturating)
4144 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4145 else
4146 llvm_unreachable("Unexpected intrinsic");
4147
4148 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
4149 CI->getArgOperand(2) };
4150 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
4151 Args);
4152 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4153 : CI->getArgOperand(0);
4154 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4155 } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4156 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4157 Name == "subborrow.u32" || Name == "subborrow.u64")) {
4158 Intrinsic::ID IID;
4159 if (Name[0] == 'a' && Name.back() == '2')
4160 IID = Intrinsic::x86_addcarry_32;
4161 else if (Name[0] == 'a' && Name.back() == '4')
4162 IID = Intrinsic::x86_addcarry_64;
4163 else if (Name[0] == 's' && Name.back() == '2')
4164 IID = Intrinsic::x86_subborrow_32;
4165 else if (Name[0] == 's' && Name.back() == '4')
4166 IID = Intrinsic::x86_subborrow_64;
4167 else
4168 llvm_unreachable("Unexpected intrinsic");
4169
4170 // Make a call with 3 operands.
4171 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
4172 CI->getArgOperand(2)};
4173 Value *NewCall = Builder.CreateCall(
4175 Args);
4176
4177 // Extract the second result and store it.
4178 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4179 // Cast the pointer to the right type.
4180 Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
4181 llvm::PointerType::getUnqual(Data->getType()));
4182 Builder.CreateAlignedStore(Data, Ptr, Align(1));
4183 // Replace the original call result with the first result of the new call.
4184 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4185
4186 CI->replaceAllUsesWith(CF);
4187 Rep = nullptr;
4188 } else if (IsX86 && Name.starts_with("avx512.mask.") &&
4189 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4190 // Rep will be updated by the call in the condition.
4191 } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
4192 Value *Arg = CI->getArgOperand(0);
4193 Value *Neg = Builder.CreateNeg(Arg, "neg");
4194 Value *Cmp = Builder.CreateICmpSGE(
4195 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
4196 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
4197 } else if (IsNVVM && (Name.starts_with("atomic.load.add.f32.p") ||
4198 Name.starts_with("atomic.load.add.f64.p"))) {
4199 Value *Ptr = CI->getArgOperand(0);
4200 Value *Val = CI->getArgOperand(1);
4201 Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
4202 AtomicOrdering::SequentiallyConsistent);
4203 } else if (IsNVVM && Name.consume_front("max.") &&
4204 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
4205 Name == "ui" || Name == "ull")) {
4206 Value *Arg0 = CI->getArgOperand(0);
4207 Value *Arg1 = CI->getArgOperand(1);
4208 Value *Cmp = Name.starts_with("u")
4209 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
4210 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
4211 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
4212 } else if (IsNVVM && Name.consume_front("min.") &&
4213 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
4214 Name == "ui" || Name == "ull")) {
4215 Value *Arg0 = CI->getArgOperand(0);
4216 Value *Arg1 = CI->getArgOperand(1);
4217 Value *Cmp = Name.starts_with("u")
4218 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
4219 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
4220 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
4221 } else if (IsNVVM && Name == "clz.ll") {
4222 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
4223 Value *Arg = CI->getArgOperand(0);
4224 Value *Ctlz = Builder.CreateCall(
4225 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
4226 {Arg->getType()}),
4227 {Arg, Builder.getFalse()}, "ctlz");
4228 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
4229 } else if (IsNVVM && Name == "popc.ll") {
4230 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
4231 // i64.
4232 Value *Arg = CI->getArgOperand(0);
4233 Value *Popc = Builder.CreateCall(
4234 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
4235 {Arg->getType()}),
4236 Arg, "ctpop");
4237 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
4238 } else if (IsNVVM) {
4239 if (Name == "h2f") {
4240 Rep =
4242 F->getParent(), Intrinsic::convert_from_fp16,
4243 {Builder.getFloatTy()}),
4244 CI->getArgOperand(0), "h2f");
4245 } else {
4247 if (IID != Intrinsic::not_intrinsic &&
4248 !F->getReturnType()->getScalarType()->isBFloatTy()) {
4249 rename(F);
4250 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
4252 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
4253 Value *Arg = CI->getArgOperand(I);
4254 Type *OldType = Arg->getType();
4255 Type *NewType = NewFn->getArg(I)->getType();
4256 Args.push_back((OldType->isIntegerTy() &&
4257 NewType->getScalarType()->isBFloatTy())
4258 ? Builder.CreateBitCast(Arg, NewType)
4259 : Arg);
4260 }
4261 Rep = Builder.CreateCall(NewFn, Args);
4262 if (F->getReturnType()->isIntegerTy())
4263 Rep = Builder.CreateBitCast(Rep, F->getReturnType());
4264 }
4265 }
4266 } else if (IsARM) {
4267 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
4268 } else if (IsAMDGCN) {
4269 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
4270 } else if (IsDbg) {
4271 // We might have decided we don't want the new format after all between
4272 // first requesting the upgrade and now; skip the conversion if that is
4273 // the case, and check here to see if the intrinsic needs to be upgraded
4274 // normally.
4275 if (!CI->getModule()->IsNewDbgInfoFormat) {
4276 bool NeedsUpgrade =
4277 upgradeIntrinsicFunction1(CI->getCalledFunction(), NewFn, false);
4278 if (!NeedsUpgrade)
4279 return;
4280 FallthroughToDefaultUpgrade = true;
4281 } else {
4283 }
4284 } else {
4285 llvm_unreachable("Unknown function for CallBase upgrade.");
4286 }
4287
4288 if (!FallthroughToDefaultUpgrade) {
4289 if (Rep)
4290 CI->replaceAllUsesWith(Rep);
4291 CI->eraseFromParent();
4292 return;
4293 }
4294 }
4295
4296 const auto &DefaultCase = [&]() -> void {
4297 if (CI->getFunctionType() == NewFn->getFunctionType()) {
4298 // Handle generic mangling change.
4299 assert(
4300 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
4301 "Unknown function for CallBase upgrade and isn't just a name change");
4302 CI->setCalledFunction(NewFn);
4303 return;
4304 }
4305
4306 // This must be an upgrade from a named to a literal struct.
4307 if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
4308 assert(OldST != NewFn->getReturnType() &&
4309 "Return type must have changed");
4310 assert(OldST->getNumElements() ==
4311 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
4312 "Must have same number of elements");
4313
4314 SmallVector<Value *> Args(CI->args());
4315 Value *NewCI = Builder.CreateCall(NewFn, Args);
4316 Value *Res = PoisonValue::get(OldST);
4317 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4318 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4319 Res = Builder.CreateInsertValue(Res, Elem, Idx);
4320 }
4321 CI->replaceAllUsesWith(Res);
4322 CI->eraseFromParent();
4323 return;
4324 }
4325
4326 // We're probably about to produce something invalid. Let the verifier catch
4327 // it instead of dying here.
4328 CI->setCalledOperand(
4330 return;
4331 };
4332 CallInst *NewCall = nullptr;
4333 switch (NewFn->getIntrinsicID()) {
4334 default: {
4335 DefaultCase();
4336 return;
4337 }
4338 case Intrinsic::arm_neon_vst1:
4339 case Intrinsic::arm_neon_vst2:
4340 case Intrinsic::arm_neon_vst3:
4341 case Intrinsic::arm_neon_vst4:
4342 case Intrinsic::arm_neon_vst2lane:
4343 case Intrinsic::arm_neon_vst3lane:
4344 case Intrinsic::arm_neon_vst4lane: {
4345 SmallVector<Value *, 4> Args(CI->args());
4346 NewCall = Builder.CreateCall(NewFn, Args);
4347 break;
4348 }
4349 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4350 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4351 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4352 LLVMContext &Ctx = F->getParent()->getContext();
4353 SmallVector<Value *, 4> Args(CI->args());
4354 Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
4355 cast<ConstantInt>(Args[3])->getZExtValue());
4356 NewCall = Builder.CreateCall(NewFn, Args);
4357 break;
4358 }
4359 case Intrinsic::aarch64_sve_ld3_sret:
4360 case Intrinsic::aarch64_sve_ld4_sret:
4361 case Intrinsic::aarch64_sve_ld2_sret: {
4362 StringRef Name = F->getName();
4363 Name = Name.substr(5);
4364 unsigned N = StringSwitch<unsigned>(Name)
4365 .StartsWith("aarch64.sve.ld2", 2)
4366 .StartsWith("aarch64.sve.ld3", 3)
4367 .StartsWith("aarch64.sve.ld4", 4)
4368 .Default(0);
4370 dyn_cast<ScalableVectorType>(F->getReturnType());
4371 unsigned MinElts = RetTy->getMinNumElements() / N;
4372 SmallVector<Value *, 2> Args(CI->args());
4373 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
4375 for (unsigned I = 0; I < N; I++) {
4376 Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4377 Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
4378 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx);
4379 }
4380 NewCall = dyn_cast<CallInst>(Ret);
4381 break;
4382 }
4383
4384 case Intrinsic::coro_end: {
4385 SmallVector<Value *, 3> Args(CI->args());
4386 Args.push_back(ConstantTokenNone::get(CI->getContext()));
4387 NewCall = Builder.CreateCall(NewFn, Args);
4388 break;
4389 }
4390
4391 case Intrinsic::vector_extract: {
4392 StringRef Name = F->getName();
4393 Name = Name.substr(5); // Strip llvm
4394 if (!Name.starts_with("aarch64.sve.tuple.get")) {
4395 DefaultCase();
4396 return;
4397 }
4399 dyn_cast<ScalableVectorType>(F->getReturnType());
4400 unsigned MinElts = RetTy->getMinNumElements();
4401 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4402 Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4403 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
4404 break;
4405 }
4406
4407 case Intrinsic::vector_insert: {
4408 StringRef Name = F->getName();
4409 Name = Name.substr(5);
4410 if (!Name.starts_with("aarch64.sve.tuple")) {
4411 DefaultCase();
4412 return;
4413 }
4414 if (Name.starts_with("aarch64.sve.tuple.set")) {
4415 unsigned I = dyn_cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4416 ScalableVectorType *Ty =
4417 dyn_cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
4418 Value *NewIdx =
4419 ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
4420 NewCall = Builder.CreateCall(
4421 NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
4422 break;
4423 }
4424 if (Name.starts_with("aarch64.sve.tuple.create")) {
4425 unsigned N = StringSwitch<unsigned>(Name)
4426 .StartsWith("aarch64.sve.tuple.create2", 2)
4427 .StartsWith("aarch64.sve.tuple.create3", 3)
4428 .StartsWith("aarch64.sve.tuple.create4", 4)
4429 .Default(0);
4430 assert(N > 1 && "Create is expected to be between 2-4");
4432 dyn_cast<ScalableVectorType>(F->getReturnType());
4434 unsigned MinElts = RetTy->getMinNumElements() / N;
4435 for (unsigned I = 0; I < N; I++) {
4436 Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4437 Value *V = CI->getArgOperand(I);
4438 Ret = Builder.CreateInsertVector(RetTy, Ret, V, Idx);
4439 }
4440 NewCall = dyn_cast<CallInst>(Ret);
4441 }
4442 break;
4443 }
4444
4445 case Intrinsic::arm_neon_bfdot:
4446 case Intrinsic::arm_neon_bfmmla:
4447 case Intrinsic::arm_neon_bfmlalb:
4448 case Intrinsic::arm_neon_bfmlalt:
4449 case Intrinsic::aarch64_neon_bfdot:
4450 case Intrinsic::aarch64_neon_bfmmla:
4451 case Intrinsic::aarch64_neon_bfmlalb:
4452 case Intrinsic::aarch64_neon_bfmlalt: {
4454 assert(CI->arg_size() == 3 &&
4455 "Mismatch between function args and call args");
4456 size_t OperandWidth =
4458 assert((OperandWidth == 64 || OperandWidth == 128) &&
4459 "Unexpected operand width");
4460 Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
4461 auto Iter = CI->args().begin();
4462 Args.push_back(*Iter++);
4463 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4464 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4465 NewCall = Builder.CreateCall(NewFn, Args);
4466 break;
4467 }
4468
4469 case Intrinsic::bitreverse:
4470 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4471 break;
4472
4473 case Intrinsic::ctlz:
4474 case Intrinsic::cttz:
4475 assert(CI->arg_size() == 1 &&
4476 "Mismatch between function args and call args");
4477 NewCall =
4478 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
4479 break;
4480
4481 case Intrinsic::objectsize: {
4482 Value *NullIsUnknownSize =
4483 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
4484 Value *Dynamic =
4485 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
4486 NewCall = Builder.CreateCall(
4487 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
4488 break;
4489 }
4490
4491 case Intrinsic::ctpop:
4492 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4493 break;
4494
4495 case Intrinsic::convert_from_fp16:
4496 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4497 break;
4498
4499 case Intrinsic::dbg_value: {
4500 StringRef Name = F->getName();
4501 Name = Name.substr(5); // Strip llvm.
4502 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
4503 if (Name.starts_with("dbg.addr")) {
4504 DIExpression *Expr = cast<DIExpression>(
4505 cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
4506 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
4507 NewCall =
4508 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4509 MetadataAsValue::get(C, Expr)});
4510 break;
4511 }
4512
4513 // Upgrade from the old version that had an extra offset argument.
4514 assert(CI->arg_size() == 4);
4515 // Drop nonzero offsets instead of attempting to upgrade them.
4516 if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
4517 if (Offset->isZeroValue()) {
4518 NewCall = Builder.CreateCall(
4519 NewFn,
4520 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
4521 break;
4522 }
4523 CI->eraseFromParent();
4524 return;
4525 }
4526
4527 case Intrinsic::ptr_annotation:
4528 // Upgrade from versions that lacked the annotation attribute argument.
4529 if (CI->arg_size() != 4) {
4530 DefaultCase();
4531 return;
4532 }
4533
4534 // Create a new call with an added null annotation attribute argument.
4535 NewCall =
4536 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4537 CI->getArgOperand(2), CI->getArgOperand(3),
4538 Constant::getNullValue(Builder.getPtrTy())});
4539 NewCall->takeName(CI);
4540 CI->replaceAllUsesWith(NewCall);
4541 CI->eraseFromParent();
4542 return;
4543
4544 case Intrinsic::var_annotation:
4545 // Upgrade from versions that lacked the annotation attribute argument.
4546 if (CI->arg_size() != 4) {
4547 DefaultCase();
4548 return;
4549 }
4550 // Create a new call with an added null annotation attribute argument.
4551 NewCall =
4552 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4553 CI->getArgOperand(2), CI->getArgOperand(3),
4554 Constant::getNullValue(Builder.getPtrTy())});
4555 NewCall->takeName(CI);
4556 CI->replaceAllUsesWith(NewCall);
4557 CI->eraseFromParent();
4558 return;
4559
4560 case Intrinsic::riscv_aes32dsi:
4561 case Intrinsic::riscv_aes32dsmi:
4562 case Intrinsic::riscv_aes32esi:
4563 case Intrinsic::riscv_aes32esmi:
4564 case Intrinsic::riscv_sm4ks:
4565 case Intrinsic::riscv_sm4ed: {
4566 // The last argument to these intrinsics used to be i8 and changed to i32.
4567 // The type overload for sm4ks and sm4ed was removed.
4568 Value *Arg2 = CI->getArgOperand(2);
4569 if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
4570 return;
4571
4572 Value *Arg0 = CI->getArgOperand(0);
4573 Value *Arg1 = CI->getArgOperand(1);
4574 if (CI->getType()->isIntegerTy(64)) {
4575 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
4576 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
4577 }
4578
4579 Arg2 = ConstantInt::get(Type::getInt32Ty(C),
4580 cast<ConstantInt>(Arg2)->getZExtValue());
4581
4582 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
4583 Value *Res = NewCall;
4584 if (Res->getType() != CI->getType())
4585 Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4586 NewCall->takeName(CI);
4587 CI->replaceAllUsesWith(Res);
4588 CI->eraseFromParent();
4589 return;
4590 }
4591 case Intrinsic::riscv_sha256sig0:
4592 case Intrinsic::riscv_sha256sig1:
4593 case Intrinsic::riscv_sha256sum0:
4594 case Intrinsic::riscv_sha256sum1:
4595 case Intrinsic::riscv_sm3p0:
4596 case Intrinsic::riscv_sm3p1: {
4597 // The last argument to these intrinsics used to be i8 and changed to i32.
4598 // The type overload for sm4ks and sm4ed was removed.
4599 if (!CI->getType()->isIntegerTy(64))
4600 return;
4601
4602 Value *Arg =
4603 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
4604
4605 NewCall = Builder.CreateCall(NewFn, Arg);
4606 Value *Res =
4607 Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4608 NewCall->takeName(CI);
4609 CI->replaceAllUsesWith(Res);
4610 CI->eraseFromParent();
4611 return;
4612 }
4613
4614 case Intrinsic::x86_xop_vfrcz_ss:
4615 case Intrinsic::x86_xop_vfrcz_sd:
4616 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
4617 break;
4618
4619 case Intrinsic::x86_xop_vpermil2pd:
4620 case Intrinsic::x86_xop_vpermil2ps:
4621 case Intrinsic::x86_xop_vpermil2pd_256:
4622 case Intrinsic::x86_xop_vpermil2ps_256: {
4623 SmallVector<Value *, 4> Args(CI->args());
4624 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
4625 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
4626 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
4627 NewCall = Builder.CreateCall(NewFn, Args);
4628 break;
4629 }
4630
4631 case Intrinsic::x86_sse41_ptestc:
4632 case Intrinsic::x86_sse41_ptestz:
4633 case Intrinsic::x86_sse41_ptestnzc: {
4634 // The arguments for these intrinsics used to be v4f32, and changed
4635 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
4636 // So, the only thing required is a bitcast for both arguments.
4637 // First, check the arguments have the old type.
4638 Value *Arg0 = CI->getArgOperand(0);
4639 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
4640 return;
4641
4642 // Old intrinsic, add bitcasts
4643 Value *Arg1 = CI->getArgOperand(1);
4644
4645 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
4646
4647 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
4648 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
4649
4650 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
4651 break;
4652 }
4653
4654 case Intrinsic::x86_rdtscp: {
4655 // This used to take 1 arguments. If we have no arguments, it is already
4656 // upgraded.
4657 if (CI->getNumOperands() == 0)
4658 return;
4659
4660 NewCall = Builder.CreateCall(NewFn);
4661 // Extract the second result and store it.
4662 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4663 // Cast the pointer to the right type.
4664 Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
4665 llvm::PointerType::getUnqual(Data->getType()));
4666 Builder.CreateAlignedStore(Data, Ptr, Align(1));
4667 // Replace the original call result with the first result of the new call.
4668 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
4669
4670 NewCall->takeName(CI);
4671 CI->replaceAllUsesWith(TSC);
4672 CI->eraseFromParent();
4673 return;
4674 }
4675
4676 case Intrinsic::x86_sse41_insertps:
4677 case Intrinsic::x86_sse41_dppd:
4678 case Intrinsic::x86_sse41_dpps:
4679 case Intrinsic::x86_sse41_mpsadbw:
4680 case Intrinsic::x86_avx_dp_ps_256:
4681 case Intrinsic::x86_avx2_mpsadbw: {
4682 // Need to truncate the last argument from i32 to i8 -- this argument models
4683 // an inherently 8-bit immediate operand to these x86 instructions.
4684 SmallVector<Value *, 4> Args(CI->args());
4685
4686 // Replace the last argument with a trunc.
4687 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
4688 NewCall = Builder.CreateCall(NewFn, Args);
4689 break;
4690 }
4691
4692 case Intrinsic::x86_avx512_mask_cmp_pd_128:
4693 case Intrinsic::x86_avx512_mask_cmp_pd_256:
4694 case Intrinsic::x86_avx512_mask_cmp_pd_512:
4695 case Intrinsic::x86_avx512_mask_cmp_ps_128:
4696 case Intrinsic::x86_avx512_mask_cmp_ps_256:
4697 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
4698 SmallVector<Value *, 4> Args(CI->args());
4699 unsigned NumElts =
4700 cast<FixedVectorType>(Args[0]->getType())->getNumElements();
4701 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
4702
4703 NewCall = Builder.CreateCall(NewFn, Args);
4704 Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
4705
4706 NewCall->takeName(CI);
4707 CI->replaceAllUsesWith(Res);
4708 CI->eraseFromParent();
4709 return;
4710 }
4711
4712 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
4713 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
4714 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
4715 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
4716 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
4717 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
4718 SmallVector<Value *, 4> Args(CI->args());
4719 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4720 if (NewFn->getIntrinsicID() ==
4721 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
4722 Args[1] = Builder.CreateBitCast(
4723 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4724
4725 NewCall = Builder.CreateCall(NewFn, Args);
4726 Value *Res = Builder.CreateBitCast(
4727 NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
4728
4729 NewCall->takeName(CI);
4730 CI->replaceAllUsesWith(Res);
4731 CI->eraseFromParent();
4732 return;
4733 }
4734 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
4735 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
4736 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
4737 SmallVector<Value *, 4> Args(CI->args());
4738 unsigned NumElts =
4739 cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
4740 Args[1] = Builder.CreateBitCast(
4741 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4742 Args[2] = Builder.CreateBitCast(
4743 Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4744
4745 NewCall = Builder.CreateCall(NewFn, Args);
4746 break;
4747 }
4748
4749 case Intrinsic::thread_pointer: {
4750 NewCall = Builder.CreateCall(NewFn, {});
4751 break;
4752 }
4753
4754 case Intrinsic::memcpy:
4755 case Intrinsic::memmove:
4756 case Intrinsic::memset: {
4757 // We have to make sure that the call signature is what we're expecting.
4758 // We only want to change the old signatures by removing the alignment arg:
4759 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
4760 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
4761 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
4762 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
4763 // Note: i8*'s in the above can be any pointer type
4764 if (CI->arg_size() != 5) {
4765 DefaultCase();
4766 return;
4767 }
4768 // Remove alignment argument (3), and add alignment attributes to the
4769 // dest/src pointers.
4770 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
4771 CI->getArgOperand(2), CI->getArgOperand(4)};
4772 NewCall = Builder.CreateCall(NewFn, Args);
4773 AttributeList OldAttrs = CI->getAttributes();
4775 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
4776 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
4777 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
4778 NewCall->setAttributes(NewAttrs);
4779 auto *MemCI = cast<MemIntrinsic>(NewCall);
4780 // All mem intrinsics support dest alignment.
4781 const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
4782 MemCI->setDestAlignment(Align->getMaybeAlignValue());
4783 // Memcpy/Memmove also support source alignment.
4784 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
4785 MTI->setSourceAlignment(Align->getMaybeAlignValue());
4786 break;
4787 }
4788 }
4789 assert(NewCall && "Should have either set this variable or returned through "
4790 "the default case");
4791 NewCall->takeName(CI);
4792 CI->replaceAllUsesWith(NewCall);
4793 CI->eraseFromParent();
4794}
4795
4797 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
4798
4799 // Check if this function should be upgraded and get the replacement function
4800 // if there is one.
4801 Function *NewFn;
4802 if (UpgradeIntrinsicFunction(F, NewFn)) {
4803 // Replace all users of the old function with the new function or new
4804 // instructions. This is not a range loop because the call is deleted.
4805 for (User *U : make_early_inc_range(F->users()))
4806 if (CallBase *CB = dyn_cast<CallBase>(U))
4807 UpgradeIntrinsicCall(CB, NewFn);
4808
4809 // Remove old function, no longer used, from the module.
4810 F->eraseFromParent();
4811 }
4812}
4813
4815 const unsigned NumOperands = MD.getNumOperands();
4816 if (NumOperands == 0)
4817 return &MD; // Invalid, punt to a verifier error.
4818
4819 // Check if the tag uses struct-path aware TBAA format.
4820 if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
4821 return &MD;
4822
4823 auto &Context = MD.getContext();
4824 if (NumOperands == 3) {
4825 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
4826 MDNode *ScalarType = MDNode::get(Context, Elts);
4827 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
4828 Metadata *Elts2[] = {ScalarType, ScalarType,
4831 MD.getOperand(2)};
4832 return MDNode::get(Context, Elts2);
4833 }
4834 // Create a MDNode <MD, MD, offset 0>
4837 return MDNode::get(Context, Elts);
4838}
4839
4841 Instruction *&Temp) {
4842 if (Opc != Instruction::BitCast)
4843 return nullptr;
4844
4845 Temp = nullptr;
4846 Type *SrcTy = V->getType();
4847 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4848 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4849 LLVMContext &Context = V->getContext();
4850
4851 // We have no information about target data layout, so we assume that
4852 // the maximum pointer size is 64bit.
4853 Type *MidTy = Type::getInt64Ty(Context);
4854 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
4855
4856 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
4857 }
4858
4859 return nullptr;
4860}
4861
4863 if (Opc != Instruction::BitCast)
4864 return nullptr;
4865
4866 Type *SrcTy = C->getType();
4867 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4868 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4869 LLVMContext &Context = C->getContext();
4870
4871 // We have no information about target data layout, so we assume that
4872 // the maximum pointer size is 64bit.
4873 Type *MidTy = Type::getInt64Ty(Context);
4874
4876 DestTy);
4877 }
4878
4879 return nullptr;
4880}
4881
4882/// Check the debug info version number, if it is out-dated, drop the debug
4883/// info. Return true if module is modified.
4886 return false;
4887
4888 unsigned Version = getDebugMetadataVersionFromModule(M);
4889 if (Version == DEBUG_METADATA_VERSION) {
4890 bool BrokenDebugInfo = false;
4891 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
4892 report_fatal_error("Broken module found, compilation aborted!");
4893 if (!BrokenDebugInfo)
4894 // Everything is ok.
4895 return false;
4896 else {
4897 // Diagnose malformed debug info.
4899 M.getContext().diagnose(Diag);
4900 }
4901 }
4902 bool Modified = StripDebugInfo(M);
4903 if (Modified && Version != DEBUG_METADATA_VERSION) {
4904 // Diagnose a version mismatch.
4905 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
4906 M.getContext().diagnose(DiagVersion);
4907 }
4908 return Modified;
4909}
4910
4911/// This checks for objc retain release marker which should be upgraded. It
4912/// returns true if module is modified.
4914 bool Changed = false;
4915 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
4916 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
4917 if (ModRetainReleaseMarker) {
4918 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
4919 if (Op) {
4920 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
4921 if (ID) {
4922 SmallVector<StringRef, 4> ValueComp;
4923 ID->getString().split(ValueComp, "#");
4924 if (ValueComp.size() == 2) {
4925 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
4926 ID = MDString::get(M.getContext(), NewValue);
4927 }
4928 M.addModuleFlag(Module::Error, MarkerKey, ID);
4929 M.eraseNamedMetadata(ModRetainReleaseMarker);
4930 Changed = true;
4931 }
4932 }
4933 }
4934 return Changed;
4935}
4936
4938 // This lambda converts normal function calls to ARC runtime functions to
4939 // intrinsic calls.
4940 auto UpgradeToIntrinsic = [&](const char *OldFunc,
4941 llvm::Intrinsic::ID IntrinsicFunc) {
4942 Function *Fn = M.getFunction(OldFunc);
4943
4944 if (!Fn)
4945 return;
4946
4947 Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc);
4948
4949 for (User *U : make_early_inc_range(Fn->users())) {
4950 CallInst *CI = dyn_cast<CallInst>(U);
4951 if (!CI || CI->getCalledFunction() != Fn)
4952 continue;
4953
4954 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
4955 FunctionType *NewFuncTy = NewFn->getFunctionType();
4957
4958 // Don't upgrade the intrinsic if it's not valid to bitcast the return
4959 // value to the return type of the old function.
4960 if (NewFuncTy->getReturnType() != CI->getType() &&
4961 !CastInst::castIsValid(Instruction::BitCast, CI,
4962 NewFuncTy->getReturnType()))
4963 continue;
4964
4965 bool InvalidCast = false;
4966
4967 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
4968 Value *Arg = CI->getArgOperand(I);
4969
4970 // Bitcast argument to the parameter type of the new function if it's
4971 // not a variadic argument.
4972 if (I < NewFuncTy->getNumParams()) {
4973 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
4974 // to the parameter type of the new function.
4975 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
4976 NewFuncTy->getParamType(I))) {
4977 InvalidCast = true;
4978 break;
4979 }
4980 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
4981 }
4982 Args.push_back(Arg);
4983 }
4984
4985 if (InvalidCast)
4986 continue;
4987
4988 // Create a call instruction that calls the new function.
4989 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
4990 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4991 NewCall->takeName(CI);
4992
4993 // Bitcast the return value back to the type of the old call.
4994 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
4995
4996 if (!CI->use_empty())
4997 CI->replaceAllUsesWith(NewRetVal);
4998 CI->eraseFromParent();
4999 }
5000
5001 if (Fn->use_empty())
5002 Fn->eraseFromParent();
5003 };
5004
5005 // Unconditionally convert a call to "clang.arc.use" to a call to
5006 // "llvm.objc.clang.arc.use".
5007 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
5008
5009 // Upgrade the retain release marker. If there is no need to upgrade
5010 // the marker, that means either the module is already new enough to contain
5011 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
5013 return;
5014
5015 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
5016 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
5017 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
5018 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
5019 {"objc_autoreleaseReturnValue",
5020 llvm::Intrinsic::objc_autoreleaseReturnValue},
5021 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
5022 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
5023 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
5024 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
5025 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
5026 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
5027 {"objc_release", llvm::Intrinsic::objc_release},
5028 {"objc_retain", llvm::Intrinsic::objc_retain},
5029 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
5030 {"objc_retainAutoreleaseReturnValue",
5031 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
5032 {"objc_retainAutoreleasedReturnValue",
5033 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
5034 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
5035 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
5036 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
5037 {"objc_unsafeClaimAutoreleasedReturnValue",
5038 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
5039 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
5040 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
5041 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
5042 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
5043 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
5044 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
5045 {"objc_arc_annotation_topdown_bbstart",
5046 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
5047 {"objc_arc_annotation_topdown_bbend",
5048 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
5049 {"objc_arc_annotation_bottomup_bbstart",
5050 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
5051 {"objc_arc_annotation_bottomup_bbend",
5052 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
5053
5054 for (auto &I : RuntimeFuncs)
5055 UpgradeToIntrinsic(I.first, I.second);
5056}
5057
5059 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
5060 if (!ModFlags)
5061 return false;
5062
5063 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
5064 bool HasSwiftVersionFlag = false;
5065 uint8_t SwiftMajorVersion, SwiftMinorVersion;
5066 uint32_t SwiftABIVersion;
5067 auto Int8Ty = Type::getInt8Ty(M.getContext());
5068 auto Int32Ty = Type::getInt32Ty(M.getContext());
5069
5070 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
5071 MDNode *Op = ModFlags->getOperand(I);
5072 if (Op->getNumOperands() != 3)
5073 continue;
5074 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
5075 if (!ID)
5076 continue;
5077 auto SetBehavior = [&](Module::ModFlagBehavior B) {
5078 Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
5079 Type::getInt32Ty(M.getContext()), B)),
5080 MDString::get(M.getContext(), ID->getString()),
5081 Op->getOperand(2)};
5082 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5083 Changed = true;
5084 };
5085
5086 if (ID->getString() == "Objective-C Image Info Version")
5087 HasObjCFlag = true;
5088 if (ID->getString() == "Objective-C Class Properties")
5089 HasClassProperties = true;
5090 // Upgrade PIC from Error/Max to Min.
5091 if (ID->getString() == "PIC Level") {
5092 if (auto *Behavior =
5093 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
5094 uint64_t V = Behavior->getLimitedValue();
5095 if (V == Module::Error || V == Module::Max)
5096 SetBehavior(Module::Min);
5097 }
5098 }
5099 // Upgrade "PIE Level" from Error to Max.
5100 if (ID->getString() == "PIE Level")
5101 if (auto *Behavior =
5102 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0)))
5103 if (Behavior->getLimitedValue() == Module::Error)
5104 SetBehavior(Module::Max);
5105
5106 // Upgrade branch protection and return address signing module flags. The
5107 // module flag behavior for these fields were Error and now they are Min.
5108 if (ID->getString() == "branch-target-enforcement" ||
5109 ID->getString().starts_with("sign-return-address")) {
5110 if (auto *Behavior =
5111 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
5112 if (Behavior->getLimitedValue() == Module::Error) {
5113 Type *Int32Ty = Type::getInt32Ty(M.getContext());
5114 Metadata *Ops[3] = {
5115 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
5116 Op->getOperand(1), Op->getOperand(2)};
5117 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5118 Changed = true;
5119 }
5120 }
5121 }
5122
5123 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
5124 // section name so that llvm-lto will not complain about mismatching
5125 // module flags that is functionally the same.
5126 if (ID->getString() == "Objective-C Image Info Section") {
5127 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
5128 SmallVector<StringRef, 4> ValueComp;
5129 Value->getString().split(ValueComp, " ");
5130 if (ValueComp.size() != 1) {
5131 std::string NewValue;
5132 for (auto &S : ValueComp)
5133 NewValue += S.str();
5134 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
5135 MDString::get(M.getContext(), NewValue)};
5136 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5137 Changed = true;
5138 }
5139 }
5140 }
5141
5142 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
5143 // If the higher bits are set, it adds new module flag for swift info.
5144 if (ID->getString() == "Objective-C Garbage Collection") {
5145 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
5146 if (Md) {
5147 assert(Md->getValue() && "Expected non-empty metadata");
5148 auto Type = Md->getValue()->getType();
5149 if (Type == Int8Ty)
5150 continue;
5151 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
5152 if ((Val & 0xff) != Val) {
5153 HasSwiftVersionFlag = true;
5154 SwiftABIVersion = (Val & 0xff00) >> 8;
5155 SwiftMajorVersion = (Val & 0xff000000) >> 24;
5156 SwiftMinorVersion = (Val & 0xff0000) >> 16;
5157 }
5158 Metadata *Ops[3] = {
5160 Op->getOperand(1),
5161 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
5162 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5163 Changed = true;
5164 }
5165 }
5166
5167 if (ID->getString() == "amdgpu_code_object_version") {
5168 Metadata *Ops[3] = {
5169 Op->getOperand(0),
5170 MDString::get(M.getContext(), "amdhsa_code_object_version"),
5171 Op->getOperand(2)};
5172 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5173 Changed = true;
5174 }
5175 }
5176
5177 // "Objective-C Class Properties" is recently added for Objective-C. We
5178 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
5179 // flag of value 0, so we can correclty downgrade this flag when trying to
5180 // link an ObjC bitcode without this module flag with an ObjC bitcode with
5181 // this module flag.
5182 if (HasObjCFlag && !HasClassProperties) {
5183 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
5184 (uint32_t)0);
5185 Changed = true;
5186 }
5187
5188 if (HasSwiftVersionFlag) {
5189 M.addModuleFlag(Module::Error, "Swift ABI Version",
5190 SwiftABIVersion);
5191 M.addModuleFlag(Module::Error, "Swift Major Version",
5192 ConstantInt::get(Int8Ty, SwiftMajorVersion));
5193 M.addModuleFlag(Module::Error, "Swift Minor Version",
5194 ConstantInt::get(Int8Ty, SwiftMinorVersion));
5195 Changed = true;
5196 }
5197
5198 return Changed;
5199}
5200
5202 auto TrimSpaces = [](StringRef Section) -> std::string {
5203 SmallVector<StringRef, 5> Components;
5204 Section.split(Components, ',');
5205
5206 SmallString<32> Buffer;
5207 raw_svector_ostream OS(Buffer);
5208
5209 for (auto Component : Components)
5210 OS << ',' << Component.trim();
5211
5212 return std::string(OS.str().substr(1));
5213 };
5214
5215 for (auto &GV : M.globals()) {
5216 if (!GV.hasSection())
5217 continue;
5218
5219 StringRef Section = GV.getSection();
5220
5221 if (!Section.starts_with("__DATA, __objc_catlist"))
5222 continue;
5223
5224 // __DATA, __objc_catlist, regular, no_dead_strip
5225 // __DATA,__objc_catlist,regular,no_dead_strip
5226 GV.setSection(TrimSpaces(Section));
5227 }
5228}
5229
5230namespace {
5231// Prior to LLVM 10.0, the strictfp attribute could be used on individual
5232// callsites within a function that did not also have the strictfp attribute.
5233// Since 10.0, if strict FP semantics are needed within a function, the
5234// function must have the strictfp attribute and all calls within the function
5235// must also have the strictfp attribute. This latter restriction is
5236// necessary to prevent unwanted libcall simplification when a function is
5237// being cloned (such as for inlining).
5238//
5239// The "dangling" strictfp attribute usage was only used to prevent constant
5240// folding and other libcall simplification. The nobuiltin attribute on the
5241// callsite has the same effect.
5242struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
5243 StrictFPUpgradeVisitor() = default;
5244
5245 void visitCallBase(CallBase &Call) {
5246 if (!Call.isStrictFP())
5247 return;
5248 if (isa<ConstrainedFPIntrinsic>(&Call))
5249 return;
5250 // If we get here, the caller doesn't have the strictfp attribute
5251 // but this callsite does. Replace the strictfp attribute with nobuiltin.
5252 Call.removeFnAttr(Attribute::StrictFP);
5253 Call.addFnAttr(Attribute::NoBuiltin);
5254 }
5255};
5256} // namespace
5257
5259 // If a function definition doesn't have the strictfp attribute,
5260 // convert any callsite strictfp attributes to nobuiltin.
5261 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
5262 StrictFPUpgradeVisitor SFPV;
5263 SFPV.visit(F);
5264 }
5265
5266 // Remove all incompatibile attributes from function.
5267 F.removeRetAttrs(AttributeFuncs::typeIncompatible(F.getReturnType()));
5268 for (auto &Arg : F.args())
5269 Arg.removeAttrs(AttributeFuncs::typeIncompatible(Arg.getType()));
5270
5271 // Older versions of LLVM treated an "implicit-section-name" attribute
5272 // similarly to directly setting the section on a Function.
5273 if (Attribute A = F.getFnAttribute("implicit-section-name");
5274 A.isValid() && A.isStringAttribute()) {
5275 F.setSection(A.getValueAsString());
5276 F.removeFnAttr("implicit-section-name");
5277 }
5278}
5279
5280static bool isOldLoopArgument(Metadata *MD) {
5281 auto *T = dyn_cast_or_null<MDTuple>(MD);
5282 if (!T)
5283 return false;
5284 if (T->getNumOperands() < 1)
5285 return false;
5286 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
5287 if (!S)
5288 return false;
5289 return S->getString().starts_with("llvm.vectorizer.");
5290}
5291
5293 StringRef OldPrefix = "llvm.vectorizer.";
5294 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
5295
5296 if (OldTag == "llvm.vectorizer.unroll")
5297 return MDString::get(C, "llvm.loop.interleave.count");
5298
5299 return MDString::get(
5300 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
5301 .str());
5302}
5303
5305 auto *T = dyn_cast_or_null<MDTuple>(MD);
5306 if (!T)
5307 return MD;
5308 if (T->getNumOperands() < 1)
5309 return MD;
5310 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
5311 if (!OldTag)
5312 return MD;
5313 if (!OldTag->getString().starts_with("llvm.vectorizer."))
5314 return MD;
5315
5316 // This has an old tag. Upgrade it.
5318 Ops.reserve(T->getNumOperands());
5319 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
5320 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
5321 Ops.push_back(T->getOperand(I));
5322
5323 return MDTuple::get(T->getContext(), Ops);
5324}
5325
5327 auto *T = dyn_cast<MDTuple>(&N);
5328 if (!T)
5329 return &N;
5330
5331 if (none_of(T->operands(), isOldLoopArgument))
5332 return &N;
5333
5335 Ops.reserve(T->getNumOperands());
5336 for (Metadata *MD : T->operands())
5338
5339 return MDTuple::get(T->getContext(), Ops);
5340}
5341
5343 Triple T(TT);
5344 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
5345 // the address space of globals to 1. This does not apply to SPIRV Logical.
5346 if (((T.isAMDGPU() && !T.isAMDGCN()) ||
5347 (T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical()))) &&
5348 !DL.contains("-G") && !DL.starts_with("G")) {
5349 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
5350 }
5351
5352 if (T.isRISCV64()) {
5353 // Make i32 a native type for 64-bit RISC-V.
5354 auto I = DL.find("-n64-");
5355 if (I != StringRef::npos)
5356 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
5357 return DL.str();
5358 }
5359
5360 std::string Res = DL.str();
5361 // AMDGCN data layout upgrades.
5362 if (T.isAMDGCN()) {
5363 // Define address spaces for constants.
5364 if (!DL.contains("-G") && !DL.starts_with("G"))
5365 Res.append(Res.empty() ? "G1" : "-G1");
5366
5367 // Add missing non-integral declarations.
5368 // This goes before adding new address spaces to prevent incoherent string
5369 // values.
5370 if (!DL.contains("-ni") && !DL.starts_with("ni"))
5371 Res.append("-ni:7:8:9");
5372 // Update ni:7 to ni:7:8:9.
5373 if (DL.ends_with("ni:7"))
5374 Res.append(":8:9");
5375 if (DL.ends_with("ni:7:8"))
5376 Res.append(":9");
5377
5378 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
5379 // resources) An empty data layout has already been upgraded to G1 by now.
5380 if (!DL.contains("-p7") && !DL.starts_with("p7"))
5381 Res.append("-p7:160:256:256:32");
5382 if (!DL.contains("-p8") && !DL.starts_with("p8"))
5383 Res.append("-p8:128:128");
5384 if (!DL.contains("-p9") && !DL.starts_with("p9"))
5385 Res.append("-p9:192:256:256:32");
5386
5387 return Res;
5388 }
5389
5390 if (!T.isX86())
5391 return Res;
5392
5393 // If the datalayout matches the expected format, add pointer size address
5394 // spaces to the datalayout.
5395 std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64";
5396 if (StringRef Ref = Res; !Ref.contains(AddrSpaces)) {
5398 Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)");
5399 if (R.match(Res, &Groups))
5400 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
5401 }
5402
5403 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
5404 // for i128 operations prior to this being reflected in the data layout, and
5405 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
5406 // boundaries, so although this is a breaking change, the upgrade is expected
5407 // to fix more IR than it breaks.
5408 // Intel MCU is an exception and uses 4-byte-alignment.
5409 if (!T.isOSIAMCU()) {
5410 std::string I128 = "-i128:128";
5411 if (StringRef Ref = Res; !Ref.contains(I128)) {
5413 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
5414 if (R.match(Res, &Groups))
5415 Res = (Groups[1] + I128 + Groups[3]).str();
5416 }
5417 }
5418
5419 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
5420 // Raising the alignment is safe because Clang did not produce f80 values in
5421 // the MSVC environment before this upgrade was added.
5422 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
5423 StringRef Ref = Res;
5424 auto I = Ref.find("-f80:32-");
5425 if (I != StringRef::npos)
5426 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
5427 }
5428
5429 return Res;
5430}
5431
5433 StringRef FramePointer;
5434 Attribute A = B.getAttribute("no-frame-pointer-elim");
5435 if (A.isValid()) {
5436 // The value can be "true" or "false".
5437 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
5438 B.removeAttribute("no-frame-pointer-elim");
5439 }
5440 if (B.contains("no-frame-pointer-elim-non-leaf")) {
5441 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
5442 if (FramePointer != "all")
5443 FramePointer = "non-leaf";
5444 B.removeAttribute("no-frame-pointer-elim-non-leaf");
5445 }
5446 if (!FramePointer.empty())
5447 B.addAttribute("frame-pointer", FramePointer);
5448
5449 A = B.getAttribute("null-pointer-is-valid");
5450 if (A.isValid()) {
5451 // The value can be "true" or "false".
5452 bool NullPointerIsValid = A.getValueAsString() == "true";
5453 B.removeAttribute("null-pointer-is-valid");
5454 if (NullPointerIsValid)
5455 B.addAttribute(Attribute::NullPointerIsValid);
5456 }
5457}
5458
5459void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
5460 // clang.arc.attachedcall bundles are now required to have an operand.
5461 // If they don't, it's okay to drop them entirely: when there is an operand,
5462 // the "attachedcall" is meaningful and required, but without an operand,
5463 // it's just a marker NOP. Dropping it merely prevents an optimization.
5464 erase_if(Bundles, [&](OperandBundleDef &OBD) {
5465 return OBD.getTag() == "clang.arc.attachedcall" &&
5466 OBD.inputs().empty();
5467 });
5468}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
unsigned Intr
amdgpu AMDGPU Register Bank Select
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
Definition: AutoUpgrade.cpp:88
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
Definition: AutoUpgrade.cpp:72
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
Definition: AutoUpgrade.cpp:99
static MDType * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
Definition: AutoUpgrade.cpp:52
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
Definition: AutoUpgrade.cpp:56
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
@ Default
Definition: DwarfDebug.cpp:87
This file contains constants used for implementing Dwarf debug support.
uint64_t Addr
std::string Name
uint64_t Size
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define R2(n)
This file contains the declarations for metadata subclasses.
Module.h This file contains the declarations for the Module class.
uint64_t High
IntegerType * Int32Ty
LLVMContext & Context
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:76
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Class to represent array types.
Definition: DerivedTypes.h:371
Type * getElementType() const
Definition: DerivedTypes.h:384
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:748
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
Definition: Instructions.h:881
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:760
@ FAdd
*p = old + v
Definition: Instructions.h:785
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:800
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:804
AttributeSet getFnAttrs() const
The function attributes are returned.
static AttributeList get(LLVMContext &C, ArrayRef< std::pair< unsigned, Attribute > > Attrs)
Create an AttributeList with the specified parameters in it.
AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1494
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1742
Value * getCalledOperand() const
Definition: InstrTypes.h:1735
void setAttributes(AttributeList A)
Set the parameter attributes for this call.
Definition: InstrTypes.h:1823
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1687
FunctionType * getFunctionType() const
Definition: InstrTypes.h:1600
Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
Definition: InstrTypes.h:1678
void setCalledOperand(Value *V)
Definition: InstrTypes.h:1778
unsigned arg_size() const
Definition: InstrTypes.h:1685
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1819
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
Definition: InstrTypes.h:1781
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name, BasicBlock::iterator InsertBefore)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:993
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1663
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1291
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:528
static Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2126
static Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
Definition: Constants.cpp:2072
static Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2112
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:205
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:154
static Constant * get(StructType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1356
static ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
Definition: Constants.cpp:1499
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
DWARF expression.
static DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
This class represents an Operation in the Expression.
Records a position in IR for a source label (DILabel).
Base class for non-instruction debug metadata records that have positions within IR.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Diagnostic information for debug metadata version reporting.
Diagnostic information for stripping invalid debug metadata.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
Class to represent function types.
Definition: DerivedTypes.h:103
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135
Type * getReturnType() const
Definition: DerivedTypes.h:124
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:163
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:201
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:231
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition: Function.cpp:403
size_t arg_size() const
Definition: Function.h:847
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:206
Argument * getArg(unsigned i) const
Definition: Function.h:832
LinkageTypes getLinkage() const
Definition: GlobalValue.h:546
Type * getValueType() const
Definition: GlobalValue.h:296
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Definition: IRBuilder.h:461
Value * CreateFSub(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1560
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2472
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Definition: IRBuilder.h:511
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2523
Value * CreateFDiv(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1614
CallInst * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
Definition: IRBuilder.h:1045
Value * CreateSIToFP(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2094
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2460
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:539
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1807
Value * CreateFAdd(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1533
Value * CreatePointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2170
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.cpp:1214
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2516
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Definition: IRBuilder.cpp:578
Value * CreateICmpSGE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2269
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1110
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2033
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:526
ConstantInt * getInt8(uint8_t C)
Get a constant 8-bit value.
Definition: IRBuilder.h:476
Value * CreateUIToFP(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2081
IntegerType * getInt16Ty()
Fetch the type representing a 16-bit integer.
Definition: IRBuilder.h:521
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNSW=false)
Definition: IRBuilder.h:1721
Value * CreateICmpSLE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2277
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1749
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2241
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1344
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2127
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1790
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1416
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2021
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2494
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1475
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Definition: IRBuilder.cpp:598
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1327
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition: IRBuilder.h:471
Value * CreateIsNotNull(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg != 0.
Definition: IRBuilder.h:2549
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1854
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2007
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1497
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition: IRBuilder.h:569
Value * CreateICmpUGE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2253
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2196
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1826
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2412
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1456
Value * CreateFPExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2110
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1519
Value * CreateICmpULE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2261
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2351
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1587
Value * CreateFNeg(Value *V, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1730
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:516
Type * getBFloatTy()
Fetch the type representing a 16-bit brain floating point value.
Definition: IRBuilder.h:549
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1361
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2666
Base class for instruction visitors.
Definition: InstVisitor.h:78
RetTy visitCallBase(CallBase &I)
Definition: InstVisitor.h:267
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:454
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:82
const BasicBlock * getParent() const
Definition: Instruction.h:152
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1636
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:72
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:184
Metadata node.
Definition: Metadata.h:1067
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1428
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1541
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1434
LLVMContext & getContext() const
Definition: Metadata.h:1231
A single uniqued string.
Definition: Metadata.h:720
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:600
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1498
Metadata wrapper in the Value hierarchy.
Definition: Metadata.h:176
static MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition: Metadata.cpp:103
Root of the metadata hierarchy.
Definition: Metadata.h:62
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
Definition: Module.h:115
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
Definition: Module.h:136
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
Definition: Module.h:118
@ Min
Takes the min of the two values, which are required to be integers.
Definition: Module.h:150
@ Max
Takes the max of the two values, which are required to be integers.
Definition: Module.h:147
bool IsNewDbgInfoFormat
Is this Module using intrinsics to record the position of debugging information, or non-intrinsic rec...
Definition: Module.h:219
A tuple of MDNodes.
Definition: Metadata.h:1729
void setOperand(unsigned I, MDNode *New)
Definition: Metadata.cpp:1390
MDNode * getOperand(unsigned i) const
Definition: Metadata.cpp:1382
unsigned getNumOperands() const
Definition: Metadata.cpp:1378
A container for an operand bundle being viewed as a set of values rather than a set of uses.
Definition: InstrTypes.h:1447
ArrayRef< InputTy > inputs() const
Definition: InstrTypes.h:1462
StringRef getTag() const
Definition: InstrTypes.h:1470
Class to represent pointers.
Definition: DerivedTypes.h:646
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:662
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition: Regex.cpp:83
Class to represent scalable SIMD vectors.
Definition: DerivedTypes.h:586
uint64_t getMinNumElements() const
Get the minimum number of elements in this vector.
Definition: DerivedTypes.h:634
ArrayRef< int > getShuffleMask() const
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
size_t size() const
Definition: SmallVector.h:91
void reserve(size_type N)
Definition: SmallVector.h:676
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:257
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition: StringRef.h:605
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
static constexpr size_t npos
Definition: StringRef.h:52
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
StringSwitch & StartsWith(StringLiteral S, T Value)
Definition: StringSwitch.h:83
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Definition: StringSwitch.h:90
Class to represent struct types.
Definition: DerivedTypes.h:216
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:373
unsigned getNumElements() const
Random access to the elements.
Definition: DerivedTypes.h:341
Type * getElementType(unsigned N) const
Definition: DerivedTypes.h:342
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static Type * getHalfTy(LLVMContext &C)
static Type * getBFloatTy(LLVMContext &C)
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition: Type.h:146
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:262
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:216
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
bool hasName() const
Definition: Value.h:261
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
Base class of all SIMD vector types.
Definition: DerivedTypes.h:403
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:187
self_iterator getIterator()
Definition: ilist_node.h:109
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:690
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
AttributeMask typeIncompatible(Type *Ty, AttributeSafetyKind ASK=ASK_ALL)
Which attributes cannot be applied to a type.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
Definition: Function.cpp:1305
std::optional< Function * > remangleIntrinsicFunction(Function *F)
Definition: Function.cpp:1766
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:1023
AttributeList getAttributes(LLVMContext &C, ID id)
Return the attributes for an intrinsic.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1461
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1680
void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
Op::Description Desc
void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:264
std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1736
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
unsigned getDebugMetadataVersionFromModule(const Module &M)
Return Debug Info Metadata Version by checking module flags.
Definition: DebugInfo.cpp:928
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
Definition: DebugInfo.cpp:594
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
@ Dynamic
Denotes mode unknown at compile time.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2051
bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ DEBUG_METADATA_VERSION
Definition: Metadata.h:52
bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
Definition: Verifier.cpp:7053
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Description of the encoding of one expression Op.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117