LLVM 22.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/StringRef.h"
22#include "llvm/IR/Attributes.h"
23#include "llvm/IR/CallingConv.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DebugInfo.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/InstVisitor.h"
31#include "llvm/IR/Instruction.h"
33#include "llvm/IR/Intrinsics.h"
34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsARM.h"
36#include "llvm/IR/IntrinsicsNVPTX.h"
37#include "llvm/IR/IntrinsicsRISCV.h"
38#include "llvm/IR/IntrinsicsWebAssembly.h"
39#include "llvm/IR/IntrinsicsX86.h"
40#include "llvm/IR/LLVMContext.h"
41#include "llvm/IR/MDBuilder.h"
42#include "llvm/IR/Metadata.h"
43#include "llvm/IR/Module.h"
44#include "llvm/IR/Value.h"
45#include "llvm/IR/Verifier.h"
50#include "llvm/Support/Regex.h"
53#include <cstdint>
54#include <cstring>
55#include <numeric>
56
57using namespace llvm;
58
59static cl::opt<bool>
60 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
61 cl::desc("Disable autoupgrade of debug info"));
62
63static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
64
65// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
66// changed their type from v4f32 to v2i64.
68 Function *&NewFn) {
69 // Check whether this is an old version of the function, which received
70 // v4f32 arguments.
71 Type *Arg0Type = F->getFunctionType()->getParamType(0);
72 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
73 return false;
74
75 // Yes, it's old, replace it with new version.
76 rename(F);
77 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
78 return true;
79}
80
81// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
82// arguments have changed their type from i32 to i8.
84 Function *&NewFn) {
85 // Check that the last argument is an i32.
86 Type *LastArgType = F->getFunctionType()->getParamType(
87 F->getFunctionType()->getNumParams() - 1);
88 if (!LastArgType->isIntegerTy(32))
89 return false;
90
91 // Move this function aside and map down.
92 rename(F);
93 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
94 return true;
95}
96
97// Upgrade the declaration of fp compare intrinsics that change return type
98// from scalar to vXi1 mask.
100 Function *&NewFn) {
101 // Check if the return type is a vector.
102 if (F->getReturnType()->isVectorTy())
103 return false;
104
105 rename(F);
106 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
107 return true;
108}
109
110// Upgrade the declaration of multiply and add bytes intrinsics whose input
111// arguments' types have changed from vectors of i32 to vectors of i8
113 Function *&NewFn) {
114 // check if input argument type is a vector of i8
115 Type *Arg1Type = F->getFunctionType()->getParamType(1);
116 Type *Arg2Type = F->getFunctionType()->getParamType(2);
117 if (Arg1Type->isVectorTy() &&
118 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(8) &&
119 Arg2Type->isVectorTy() &&
120 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(8))
121 return false;
122
123 rename(F);
124 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
125 return true;
126}
127
129 Function *&NewFn) {
130 if (F->getReturnType()->getScalarType()->isBFloatTy())
131 return false;
132
133 rename(F);
134 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
135 return true;
136}
137
139 Function *&NewFn) {
140 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
141 return false;
142
143 rename(F);
144 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
145 return true;
146}
147
149 // All of the intrinsics matches below should be marked with which llvm
150 // version started autoupgrading them. At some point in the future we would
151 // like to use this information to remove upgrade code for some older
152 // intrinsics. It is currently undecided how we will determine that future
153 // point.
154 if (Name.consume_front("avx."))
155 return (Name.starts_with("blend.p") || // Added in 3.7
156 Name == "cvt.ps2.pd.256" || // Added in 3.9
157 Name == "cvtdq2.pd.256" || // Added in 3.9
158 Name == "cvtdq2.ps.256" || // Added in 7.0
159 Name.starts_with("movnt.") || // Added in 3.2
160 Name.starts_with("sqrt.p") || // Added in 7.0
161 Name.starts_with("storeu.") || // Added in 3.9
162 Name.starts_with("vbroadcast.s") || // Added in 3.5
163 Name.starts_with("vbroadcastf128") || // Added in 4.0
164 Name.starts_with("vextractf128.") || // Added in 3.7
165 Name.starts_with("vinsertf128.") || // Added in 3.7
166 Name.starts_with("vperm2f128.") || // Added in 6.0
167 Name.starts_with("vpermil.")); // Added in 3.1
168
169 if (Name.consume_front("avx2."))
170 return (Name == "movntdqa" || // Added in 5.0
171 Name.starts_with("pabs.") || // Added in 6.0
172 Name.starts_with("padds.") || // Added in 8.0
173 Name.starts_with("paddus.") || // Added in 8.0
174 Name.starts_with("pblendd.") || // Added in 3.7
175 Name == "pblendw" || // Added in 3.7
176 Name.starts_with("pbroadcast") || // Added in 3.8
177 Name.starts_with("pcmpeq.") || // Added in 3.1
178 Name.starts_with("pcmpgt.") || // Added in 3.1
179 Name.starts_with("pmax") || // Added in 3.9
180 Name.starts_with("pmin") || // Added in 3.9
181 Name.starts_with("pmovsx") || // Added in 3.9
182 Name.starts_with("pmovzx") || // Added in 3.9
183 Name == "pmul.dq" || // Added in 7.0
184 Name == "pmulu.dq" || // Added in 7.0
185 Name.starts_with("psll.dq") || // Added in 3.7
186 Name.starts_with("psrl.dq") || // Added in 3.7
187 Name.starts_with("psubs.") || // Added in 8.0
188 Name.starts_with("psubus.") || // Added in 8.0
189 Name.starts_with("vbroadcast") || // Added in 3.8
190 Name == "vbroadcasti128" || // Added in 3.7
191 Name == "vextracti128" || // Added in 3.7
192 Name == "vinserti128" || // Added in 3.7
193 Name == "vperm2i128"); // Added in 6.0
194
195 if (Name.consume_front("avx512.")) {
196 if (Name.consume_front("mask."))
197 // 'avx512.mask.*'
198 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
199 Name.starts_with("and.") || // Added in 3.9
200 Name.starts_with("andn.") || // Added in 3.9
201 Name.starts_with("broadcast.s") || // Added in 3.9
202 Name.starts_with("broadcastf32x4.") || // Added in 6.0
203 Name.starts_with("broadcastf32x8.") || // Added in 6.0
204 Name.starts_with("broadcastf64x2.") || // Added in 6.0
205 Name.starts_with("broadcastf64x4.") || // Added in 6.0
206 Name.starts_with("broadcasti32x4.") || // Added in 6.0
207 Name.starts_with("broadcasti32x8.") || // Added in 6.0
208 Name.starts_with("broadcasti64x2.") || // Added in 6.0
209 Name.starts_with("broadcasti64x4.") || // Added in 6.0
210 Name.starts_with("cmp.b") || // Added in 5.0
211 Name.starts_with("cmp.d") || // Added in 5.0
212 Name.starts_with("cmp.q") || // Added in 5.0
213 Name.starts_with("cmp.w") || // Added in 5.0
214 Name.starts_with("compress.b") || // Added in 9.0
215 Name.starts_with("compress.d") || // Added in 9.0
216 Name.starts_with("compress.p") || // Added in 9.0
217 Name.starts_with("compress.q") || // Added in 9.0
218 Name.starts_with("compress.store.") || // Added in 7.0
219 Name.starts_with("compress.w") || // Added in 9.0
220 Name.starts_with("conflict.") || // Added in 9.0
221 Name.starts_with("cvtdq2pd.") || // Added in 4.0
222 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
223 Name == "cvtpd2dq.256" || // Added in 7.0
224 Name == "cvtpd2ps.256" || // Added in 7.0
225 Name == "cvtps2pd.128" || // Added in 7.0
226 Name == "cvtps2pd.256" || // Added in 7.0
227 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
228 Name == "cvtqq2ps.256" || // Added in 9.0
229 Name == "cvtqq2ps.512" || // Added in 9.0
230 Name == "cvttpd2dq.256" || // Added in 7.0
231 Name == "cvttps2dq.128" || // Added in 7.0
232 Name == "cvttps2dq.256" || // Added in 7.0
233 Name.starts_with("cvtudq2pd.") || // Added in 4.0
234 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
235 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
236 Name == "cvtuqq2ps.256" || // Added in 9.0
237 Name == "cvtuqq2ps.512" || // Added in 9.0
238 Name.starts_with("dbpsadbw.") || // Added in 7.0
239 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
240 Name.starts_with("expand.b") || // Added in 9.0
241 Name.starts_with("expand.d") || // Added in 9.0
242 Name.starts_with("expand.load.") || // Added in 7.0
243 Name.starts_with("expand.p") || // Added in 9.0
244 Name.starts_with("expand.q") || // Added in 9.0
245 Name.starts_with("expand.w") || // Added in 9.0
246 Name.starts_with("fpclass.p") || // Added in 7.0
247 Name.starts_with("insert") || // Added in 4.0
248 Name.starts_with("load.") || // Added in 3.9
249 Name.starts_with("loadu.") || // Added in 3.9
250 Name.starts_with("lzcnt.") || // Added in 5.0
251 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
252 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
253 Name.starts_with("movddup") || // Added in 3.9
254 Name.starts_with("move.s") || // Added in 4.0
255 Name.starts_with("movshdup") || // Added in 3.9
256 Name.starts_with("movsldup") || // Added in 3.9
257 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
258 Name.starts_with("or.") || // Added in 3.9
259 Name.starts_with("pabs.") || // Added in 6.0
260 Name.starts_with("packssdw.") || // Added in 5.0
261 Name.starts_with("packsswb.") || // Added in 5.0
262 Name.starts_with("packusdw.") || // Added in 5.0
263 Name.starts_with("packuswb.") || // Added in 5.0
264 Name.starts_with("padd.") || // Added in 4.0
265 Name.starts_with("padds.") || // Added in 8.0
266 Name.starts_with("paddus.") || // Added in 8.0
267 Name.starts_with("palignr.") || // Added in 3.9
268 Name.starts_with("pand.") || // Added in 3.9
269 Name.starts_with("pandn.") || // Added in 3.9
270 Name.starts_with("pavg") || // Added in 6.0
271 Name.starts_with("pbroadcast") || // Added in 6.0
272 Name.starts_with("pcmpeq.") || // Added in 3.9
273 Name.starts_with("pcmpgt.") || // Added in 3.9
274 Name.starts_with("perm.df.") || // Added in 3.9
275 Name.starts_with("perm.di.") || // Added in 3.9
276 Name.starts_with("permvar.") || // Added in 7.0
277 Name.starts_with("pmaddubs.w.") || // Added in 7.0
278 Name.starts_with("pmaddw.d.") || // Added in 7.0
279 Name.starts_with("pmax") || // Added in 4.0
280 Name.starts_with("pmin") || // Added in 4.0
281 Name == "pmov.qd.256" || // Added in 9.0
282 Name == "pmov.qd.512" || // Added in 9.0
283 Name == "pmov.wb.256" || // Added in 9.0
284 Name == "pmov.wb.512" || // Added in 9.0
285 Name.starts_with("pmovsx") || // Added in 4.0
286 Name.starts_with("pmovzx") || // Added in 4.0
287 Name.starts_with("pmul.dq.") || // Added in 4.0
288 Name.starts_with("pmul.hr.sw.") || // Added in 7.0
289 Name.starts_with("pmulh.w.") || // Added in 7.0
290 Name.starts_with("pmulhu.w.") || // Added in 7.0
291 Name.starts_with("pmull.") || // Added in 4.0
292 Name.starts_with("pmultishift.qb.") || // Added in 8.0
293 Name.starts_with("pmulu.dq.") || // Added in 4.0
294 Name.starts_with("por.") || // Added in 3.9
295 Name.starts_with("prol.") || // Added in 8.0
296 Name.starts_with("prolv.") || // Added in 8.0
297 Name.starts_with("pror.") || // Added in 8.0
298 Name.starts_with("prorv.") || // Added in 8.0
299 Name.starts_with("pshuf.b.") || // Added in 4.0
300 Name.starts_with("pshuf.d.") || // Added in 3.9
301 Name.starts_with("pshufh.w.") || // Added in 3.9
302 Name.starts_with("pshufl.w.") || // Added in 3.9
303 Name.starts_with("psll.d") || // Added in 4.0
304 Name.starts_with("psll.q") || // Added in 4.0
305 Name.starts_with("psll.w") || // Added in 4.0
306 Name.starts_with("pslli") || // Added in 4.0
307 Name.starts_with("psllv") || // Added in 4.0
308 Name.starts_with("psra.d") || // Added in 4.0
309 Name.starts_with("psra.q") || // Added in 4.0
310 Name.starts_with("psra.w") || // Added in 4.0
311 Name.starts_with("psrai") || // Added in 4.0
312 Name.starts_with("psrav") || // Added in 4.0
313 Name.starts_with("psrl.d") || // Added in 4.0
314 Name.starts_with("psrl.q") || // Added in 4.0
315 Name.starts_with("psrl.w") || // Added in 4.0
316 Name.starts_with("psrli") || // Added in 4.0
317 Name.starts_with("psrlv") || // Added in 4.0
318 Name.starts_with("psub.") || // Added in 4.0
319 Name.starts_with("psubs.") || // Added in 8.0
320 Name.starts_with("psubus.") || // Added in 8.0
321 Name.starts_with("pternlog.") || // Added in 7.0
322 Name.starts_with("punpckh") || // Added in 3.9
323 Name.starts_with("punpckl") || // Added in 3.9
324 Name.starts_with("pxor.") || // Added in 3.9
325 Name.starts_with("shuf.f") || // Added in 6.0
326 Name.starts_with("shuf.i") || // Added in 6.0
327 Name.starts_with("shuf.p") || // Added in 4.0
328 Name.starts_with("sqrt.p") || // Added in 7.0
329 Name.starts_with("store.b.") || // Added in 3.9
330 Name.starts_with("store.d.") || // Added in 3.9
331 Name.starts_with("store.p") || // Added in 3.9
332 Name.starts_with("store.q.") || // Added in 3.9
333 Name.starts_with("store.w.") || // Added in 3.9
334 Name == "store.ss" || // Added in 7.0
335 Name.starts_with("storeu.") || // Added in 3.9
336 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
337 Name.starts_with("ucmp.") || // Added in 5.0
338 Name.starts_with("unpckh.") || // Added in 3.9
339 Name.starts_with("unpckl.") || // Added in 3.9
340 Name.starts_with("valign.") || // Added in 4.0
341 Name == "vcvtph2ps.128" || // Added in 11.0
342 Name == "vcvtph2ps.256" || // Added in 11.0
343 Name.starts_with("vextract") || // Added in 4.0
344 Name.starts_with("vfmadd.") || // Added in 7.0
345 Name.starts_with("vfmaddsub.") || // Added in 7.0
346 Name.starts_with("vfnmadd.") || // Added in 7.0
347 Name.starts_with("vfnmsub.") || // Added in 7.0
348 Name.starts_with("vpdpbusd.") || // Added in 7.0
349 Name.starts_with("vpdpbusds.") || // Added in 7.0
350 Name.starts_with("vpdpwssd.") || // Added in 7.0
351 Name.starts_with("vpdpwssds.") || // Added in 7.0
352 Name.starts_with("vpermi2var.") || // Added in 7.0
353 Name.starts_with("vpermil.p") || // Added in 3.9
354 Name.starts_with("vpermilvar.") || // Added in 4.0
355 Name.starts_with("vpermt2var.") || // Added in 7.0
356 Name.starts_with("vpmadd52") || // Added in 7.0
357 Name.starts_with("vpshld.") || // Added in 7.0
358 Name.starts_with("vpshldv.") || // Added in 8.0
359 Name.starts_with("vpshrd.") || // Added in 7.0
360 Name.starts_with("vpshrdv.") || // Added in 8.0
361 Name.starts_with("vpshufbitqmb.") || // Added in 8.0
362 Name.starts_with("xor.")); // Added in 3.9
363
364 if (Name.consume_front("mask3."))
365 // 'avx512.mask3.*'
366 return (Name.starts_with("vfmadd.") || // Added in 7.0
367 Name.starts_with("vfmaddsub.") || // Added in 7.0
368 Name.starts_with("vfmsub.") || // Added in 7.0
369 Name.starts_with("vfmsubadd.") || // Added in 7.0
370 Name.starts_with("vfnmsub.")); // Added in 7.0
371
372 if (Name.consume_front("maskz."))
373 // 'avx512.maskz.*'
374 return (Name.starts_with("pternlog.") || // Added in 7.0
375 Name.starts_with("vfmadd.") || // Added in 7.0
376 Name.starts_with("vfmaddsub.") || // Added in 7.0
377 Name.starts_with("vpdpbusd.") || // Added in 7.0
378 Name.starts_with("vpdpbusds.") || // Added in 7.0
379 Name.starts_with("vpdpwssd.") || // Added in 7.0
380 Name.starts_with("vpdpwssds.") || // Added in 7.0
381 Name.starts_with("vpermt2var.") || // Added in 7.0
382 Name.starts_with("vpmadd52") || // Added in 7.0
383 Name.starts_with("vpshldv.") || // Added in 8.0
384 Name.starts_with("vpshrdv.")); // Added in 8.0
385
386 // 'avx512.*'
387 return (Name == "movntdqa" || // Added in 5.0
388 Name == "pmul.dq.512" || // Added in 7.0
389 Name == "pmulu.dq.512" || // Added in 7.0
390 Name.starts_with("broadcastm") || // Added in 6.0
391 Name.starts_with("cmp.p") || // Added in 12.0
392 Name.starts_with("cvtb2mask.") || // Added in 7.0
393 Name.starts_with("cvtd2mask.") || // Added in 7.0
394 Name.starts_with("cvtmask2") || // Added in 5.0
395 Name.starts_with("cvtq2mask.") || // Added in 7.0
396 Name == "cvtusi2sd" || // Added in 7.0
397 Name.starts_with("cvtw2mask.") || // Added in 7.0
398 Name == "kand.w" || // Added in 7.0
399 Name == "kandn.w" || // Added in 7.0
400 Name == "knot.w" || // Added in 7.0
401 Name == "kor.w" || // Added in 7.0
402 Name == "kortestc.w" || // Added in 7.0
403 Name == "kortestz.w" || // Added in 7.0
404 Name.starts_with("kunpck") || // added in 6.0
405 Name == "kxnor.w" || // Added in 7.0
406 Name == "kxor.w" || // Added in 7.0
407 Name.starts_with("padds.") || // Added in 8.0
408 Name.starts_with("pbroadcast") || // Added in 3.9
409 Name.starts_with("prol") || // Added in 8.0
410 Name.starts_with("pror") || // Added in 8.0
411 Name.starts_with("psll.dq") || // Added in 3.9
412 Name.starts_with("psrl.dq") || // Added in 3.9
413 Name.starts_with("psubs.") || // Added in 8.0
414 Name.starts_with("ptestm") || // Added in 6.0
415 Name.starts_with("ptestnm") || // Added in 6.0
416 Name.starts_with("storent.") || // Added in 3.9
417 Name.starts_with("vbroadcast.s") || // Added in 7.0
418 Name.starts_with("vpshld.") || // Added in 8.0
419 Name.starts_with("vpshrd.")); // Added in 8.0
420 }
421
422 if (Name.consume_front("fma."))
423 return (Name.starts_with("vfmadd.") || // Added in 7.0
424 Name.starts_with("vfmsub.") || // Added in 7.0
425 Name.starts_with("vfmsubadd.") || // Added in 7.0
426 Name.starts_with("vfnmadd.") || // Added in 7.0
427 Name.starts_with("vfnmsub.")); // Added in 7.0
428
429 if (Name.consume_front("fma4."))
430 return Name.starts_with("vfmadd.s"); // Added in 7.0
431
432 if (Name.consume_front("sse."))
433 return (Name == "add.ss" || // Added in 4.0
434 Name == "cvtsi2ss" || // Added in 7.0
435 Name == "cvtsi642ss" || // Added in 7.0
436 Name == "div.ss" || // Added in 4.0
437 Name == "mul.ss" || // Added in 4.0
438 Name.starts_with("sqrt.p") || // Added in 7.0
439 Name == "sqrt.ss" || // Added in 7.0
440 Name.starts_with("storeu.") || // Added in 3.9
441 Name == "sub.ss"); // Added in 4.0
442
443 if (Name.consume_front("sse2."))
444 return (Name == "add.sd" || // Added in 4.0
445 Name == "cvtdq2pd" || // Added in 3.9
446 Name == "cvtdq2ps" || // Added in 7.0
447 Name == "cvtps2pd" || // Added in 3.9
448 Name == "cvtsi2sd" || // Added in 7.0
449 Name == "cvtsi642sd" || // Added in 7.0
450 Name == "cvtss2sd" || // Added in 7.0
451 Name == "div.sd" || // Added in 4.0
452 Name == "mul.sd" || // Added in 4.0
453 Name.starts_with("padds.") || // Added in 8.0
454 Name.starts_with("paddus.") || // Added in 8.0
455 Name.starts_with("pcmpeq.") || // Added in 3.1
456 Name.starts_with("pcmpgt.") || // Added in 3.1
457 Name == "pmaxs.w" || // Added in 3.9
458 Name == "pmaxu.b" || // Added in 3.9
459 Name == "pmins.w" || // Added in 3.9
460 Name == "pminu.b" || // Added in 3.9
461 Name == "pmulu.dq" || // Added in 7.0
462 Name.starts_with("pshuf") || // Added in 3.9
463 Name.starts_with("psll.dq") || // Added in 3.7
464 Name.starts_with("psrl.dq") || // Added in 3.7
465 Name.starts_with("psubs.") || // Added in 8.0
466 Name.starts_with("psubus.") || // Added in 8.0
467 Name.starts_with("sqrt.p") || // Added in 7.0
468 Name == "sqrt.sd" || // Added in 7.0
469 Name == "storel.dq" || // Added in 3.9
470 Name.starts_with("storeu.") || // Added in 3.9
471 Name == "sub.sd"); // Added in 4.0
472
473 if (Name.consume_front("sse41."))
474 return (Name.starts_with("blendp") || // Added in 3.7
475 Name == "movntdqa" || // Added in 5.0
476 Name == "pblendw" || // Added in 3.7
477 Name == "pmaxsb" || // Added in 3.9
478 Name == "pmaxsd" || // Added in 3.9
479 Name == "pmaxud" || // Added in 3.9
480 Name == "pmaxuw" || // Added in 3.9
481 Name == "pminsb" || // Added in 3.9
482 Name == "pminsd" || // Added in 3.9
483 Name == "pminud" || // Added in 3.9
484 Name == "pminuw" || // Added in 3.9
485 Name.starts_with("pmovsx") || // Added in 3.8
486 Name.starts_with("pmovzx") || // Added in 3.9
487 Name == "pmuldq"); // Added in 7.0
488
489 if (Name.consume_front("sse42."))
490 return Name == "crc32.64.8"; // Added in 3.4
491
492 if (Name.consume_front("sse4a."))
493 return Name.starts_with("movnt."); // Added in 3.9
494
495 if (Name.consume_front("ssse3."))
496 return (Name == "pabs.b.128" || // Added in 6.0
497 Name == "pabs.d.128" || // Added in 6.0
498 Name == "pabs.w.128"); // Added in 6.0
499
500 if (Name.consume_front("xop."))
501 return (Name == "vpcmov" || // Added in 3.8
502 Name == "vpcmov.256" || // Added in 5.0
503 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
504 Name.starts_with("vprot")); // Added in 8.0
505
506 return (Name == "addcarry.u32" || // Added in 8.0
507 Name == "addcarry.u64" || // Added in 8.0
508 Name == "addcarryx.u32" || // Added in 8.0
509 Name == "addcarryx.u64" || // Added in 8.0
510 Name == "subborrow.u32" || // Added in 8.0
511 Name == "subborrow.u64" || // Added in 8.0
512 Name.starts_with("vcvtph2ps.")); // Added in 11.0
513}
514
516 Function *&NewFn) {
517 // Only handle intrinsics that start with "x86.".
518 if (!Name.consume_front("x86."))
519 return false;
520
521 if (shouldUpgradeX86Intrinsic(F, Name)) {
522 NewFn = nullptr;
523 return true;
524 }
525
526 if (Name == "rdtscp") { // Added in 8.0
527 // If this intrinsic has 0 operands, it's the new version.
528 if (F->getFunctionType()->getNumParams() == 0)
529 return false;
530
531 rename(F);
532 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
533 Intrinsic::x86_rdtscp);
534 return true;
535 }
536
538
539 // SSE4.1 ptest functions may have an old signature.
540 if (Name.consume_front("sse41.ptest")) { // Added in 3.2
542 .Case("c", Intrinsic::x86_sse41_ptestc)
543 .Case("z", Intrinsic::x86_sse41_ptestz)
544 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
547 return upgradePTESTIntrinsic(F, ID, NewFn);
548
549 return false;
550 }
551
552 // Several blend and other instructions with masks used the wrong number of
553 // bits.
554
555 // Added in 3.6
557 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
558 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
559 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
560 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
561 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
562 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
565 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
566
567 if (Name.consume_front("avx512.")) {
568 if (Name.consume_front("mask.cmp.")) {
569 // Added in 7.0
571 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
572 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
573 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
574 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
575 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
576 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
579 return upgradeX86MaskedFPCompare(F, ID, NewFn);
580 } else if (Name.starts_with("vpdpbusd.") ||
581 Name.starts_with("vpdpbusds.")) {
582 // Added in 21.1
584 .Case("vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
585 .Case("vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
586 .Case("vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
587 .Case("vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
588 .Case("vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
589 .Case("vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
592 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
593 }
594 return false; // No other 'x86.avx512.*'.
595 }
596
597 if (Name.consume_front("avx2.vpdpb")) {
598 // Added in 21.1
600 .Case("ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
601 .Case("ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
602 .Case("ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
603 .Case("ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
604 .Case("sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
605 .Case("sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
606 .Case("suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
607 .Case("suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
608 .Case("uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
609 .Case("uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
610 .Case("uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
611 .Case("uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
614 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
615 return false; // No other 'x86.avx2.*'
616 }
617
618 if (Name.consume_front("avx10.vpdpb")) {
619 // Added in 21.1
621 .Case("ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
622 .Case("ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
623 .Case("sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
624 .Case("suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
625 .Case("uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
626 .Case("uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
629 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
630 return false; // No other 'x86.avx10.*'
631 }
632
633 if (Name.consume_front("avx512bf16.")) {
634 // Added in 9.0
636 .Case("cvtne2ps2bf16.128",
637 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
638 .Case("cvtne2ps2bf16.256",
639 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
640 .Case("cvtne2ps2bf16.512",
641 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
642 .Case("mask.cvtneps2bf16.128",
643 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
644 .Case("cvtneps2bf16.256",
645 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
646 .Case("cvtneps2bf16.512",
647 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
650 return upgradeX86BF16Intrinsic(F, ID, NewFn);
651
652 // Added in 9.0
654 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
655 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
656 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
659 return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
660 return false; // No other 'x86.avx512bf16.*'.
661 }
662
663 if (Name.consume_front("xop.")) {
665 if (Name.starts_with("vpermil2")) { // Added in 3.9
666 // Upgrade any XOP PERMIL2 index operand still using a float/double
667 // vector.
668 auto Idx = F->getFunctionType()->getParamType(2);
669 if (Idx->isFPOrFPVectorTy()) {
670 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
671 unsigned EltSize = Idx->getScalarSizeInBits();
672 if (EltSize == 64 && IdxSize == 128)
673 ID = Intrinsic::x86_xop_vpermil2pd;
674 else if (EltSize == 32 && IdxSize == 128)
675 ID = Intrinsic::x86_xop_vpermil2ps;
676 else if (EltSize == 64 && IdxSize == 256)
677 ID = Intrinsic::x86_xop_vpermil2pd_256;
678 else
679 ID = Intrinsic::x86_xop_vpermil2ps_256;
680 }
681 } else if (F->arg_size() == 2)
682 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
684 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
685 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
687
689 rename(F);
690 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
691 return true;
692 }
693 return false; // No other 'x86.xop.*'
694 }
695
696 if (Name == "seh.recoverfp") {
697 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
698 Intrinsic::eh_recoverfp);
699 return true;
700 }
701
702 return false;
703}
704
705// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
706// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
708 StringRef Name,
709 Function *&NewFn) {
710 if (Name.starts_with("rbit")) {
711 // '(arm|aarch64).rbit'.
713 F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType());
714 return true;
715 }
716
717 if (Name == "thread.pointer") {
718 // '(arm|aarch64).thread.pointer'.
720 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
721 return true;
722 }
723
724 bool Neon = Name.consume_front("neon.");
725 if (Neon) {
726 // '(arm|aarch64).neon.*'.
727 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
728 // v16i8 respectively.
729 if (Name.consume_front("bfdot.")) {
730 // (arm|aarch64).neon.bfdot.*'.
733 .Cases({"v2f32.v8i8", "v4f32.v16i8"},
734 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
735 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
738 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
739 assert((OperandWidth == 64 || OperandWidth == 128) &&
740 "Unexpected operand width");
741 LLVMContext &Ctx = F->getParent()->getContext();
742 std::array<Type *, 2> Tys{
743 {F->getReturnType(),
744 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
745 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
746 return true;
747 }
748 return false; // No other '(arm|aarch64).neon.bfdot.*'.
749 }
750
751 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
752 // anymore and accept v8bf16 instead of v16i8.
753 if (Name.consume_front("bfm")) {
754 // (arm|aarch64).neon.bfm*'.
755 if (Name.consume_back(".v4f32.v16i8")) {
756 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
759 .Case("mla",
760 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
761 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
762 .Case("lalb",
763 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
764 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
765 .Case("lalt",
766 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
767 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
770 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
771 return true;
772 }
773 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
774 }
775 return false; // No other '(arm|aarch64).neon.bfm*.
776 }
777 // Continue on to Aarch64 Neon or Arm Neon.
778 }
779 // Continue on to Arm or Aarch64.
780
781 if (IsArm) {
782 // 'arm.*'.
783 if (Neon) {
784 // 'arm.neon.*'.
786 .StartsWith("vclz.", Intrinsic::ctlz)
787 .StartsWith("vcnt.", Intrinsic::ctpop)
788 .StartsWith("vqadds.", Intrinsic::sadd_sat)
789 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
790 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
791 .StartsWith("vqsubu.", Intrinsic::usub_sat)
792 .StartsWith("vrinta.", Intrinsic::round)
793 .StartsWith("vrintn.", Intrinsic::roundeven)
794 .StartsWith("vrintm.", Intrinsic::floor)
795 .StartsWith("vrintp.", Intrinsic::ceil)
796 .StartsWith("vrintx.", Intrinsic::rint)
797 .StartsWith("vrintz.", Intrinsic::trunc)
800 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
801 F->arg_begin()->getType());
802 return true;
803 }
804
805 if (Name.consume_front("vst")) {
806 // 'arm.neon.vst*'.
807 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
809 if (vstRegex.match(Name, &Groups)) {
810 static const Intrinsic::ID StoreInts[] = {
811 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
812 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
813
814 static const Intrinsic::ID StoreLaneInts[] = {
815 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
816 Intrinsic::arm_neon_vst4lane};
817
818 auto fArgs = F->getFunctionType()->params();
819 Type *Tys[] = {fArgs[0], fArgs[1]};
820 if (Groups[1].size() == 1)
822 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
823 else
825 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
826 return true;
827 }
828 return false; // No other 'arm.neon.vst*'.
829 }
830
831 return false; // No other 'arm.neon.*'.
832 }
833
834 if (Name.consume_front("mve.")) {
835 // 'arm.mve.*'.
836 if (Name == "vctp64") {
837 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
838 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
839 // the function and deal with it below in UpgradeIntrinsicCall.
840 rename(F);
841 return true;
842 }
843 return false; // Not 'arm.mve.vctp64'.
844 }
845
846 if (Name.starts_with("vrintn.v")) {
848 F->getParent(), Intrinsic::roundeven, F->arg_begin()->getType());
849 return true;
850 }
851
852 // These too are changed to accept a v2i1 instead of the old v4i1.
853 if (Name.consume_back(".v4i1")) {
854 // 'arm.mve.*.v4i1'.
855 if (Name.consume_back(".predicated.v2i64.v4i32"))
856 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
857 return Name == "mull.int" || Name == "vqdmull";
858
859 if (Name.consume_back(".v2i64")) {
860 // 'arm.mve.*.v2i64.v4i1'
861 bool IsGather = Name.consume_front("vldr.gather.");
862 if (IsGather || Name.consume_front("vstr.scatter.")) {
863 if (Name.consume_front("base.")) {
864 // Optional 'wb.' prefix.
865 Name.consume_front("wb.");
866 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
867 // predicated.v2i64.v2i64.v4i1'.
868 return Name == "predicated.v2i64";
869 }
870
871 if (Name.consume_front("offset.predicated."))
872 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
873 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
874
875 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
876 return false;
877 }
878
879 return false; // No other 'arm.mve.*.v2i64.v4i1'.
880 }
881 return false; // No other 'arm.mve.*.v4i1'.
882 }
883 return false; // No other 'arm.mve.*'.
884 }
885
886 if (Name.consume_front("cde.vcx")) {
887 // 'arm.cde.vcx*'.
888 if (Name.consume_back(".predicated.v2i64.v4i1"))
889 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
890 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
891 Name == "3q" || Name == "3qa";
892
893 return false; // No other 'arm.cde.vcx*'.
894 }
895 } else {
896 // 'aarch64.*'.
897 if (Neon) {
898 // 'aarch64.neon.*'.
900 .StartsWith("frintn", Intrinsic::roundeven)
901 .StartsWith("rbit", Intrinsic::bitreverse)
904 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
905 F->arg_begin()->getType());
906 return true;
907 }
908
909 if (Name.starts_with("addp")) {
910 // 'aarch64.neon.addp*'.
911 if (F->arg_size() != 2)
912 return false; // Invalid IR.
913 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
914 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
916 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
917 return true;
918 }
919 }
920
921 // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc.
922 if (Name.starts_with("bfcvt")) {
923 NewFn = nullptr;
924 return true;
925 }
926
927 return false; // No other 'aarch64.neon.*'.
928 }
929 if (Name.consume_front("sve.")) {
930 // 'aarch64.sve.*'.
931 if (Name.consume_front("bf")) {
932 if (Name.consume_back(".lane")) {
933 // 'aarch64.sve.bf*.lane'.
936 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
937 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
938 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
941 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
942 return true;
943 }
944 return false; // No other 'aarch64.sve.bf*.lane'.
945 }
946 return false; // No other 'aarch64.sve.bf*'.
947 }
948
949 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
950 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
951 NewFn = nullptr;
952 return true;
953 }
954
955 if (Name.consume_front("addqv")) {
956 // 'aarch64.sve.addqv'.
957 if (!F->getReturnType()->isFPOrFPVectorTy())
958 return false;
959
960 auto Args = F->getFunctionType()->params();
961 Type *Tys[] = {F->getReturnType(), Args[1]};
963 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
964 return true;
965 }
966
967 if (Name.consume_front("ld")) {
968 // 'aarch64.sve.ld*'.
969 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
970 if (LdRegex.match(Name)) {
971 Type *ScalarTy =
972 cast<VectorType>(F->getReturnType())->getElementType();
973 ElementCount EC =
974 cast<VectorType>(F->arg_begin()->getType())->getElementCount();
975 Type *Ty = VectorType::get(ScalarTy, EC);
976 static const Intrinsic::ID LoadIDs[] = {
977 Intrinsic::aarch64_sve_ld2_sret,
978 Intrinsic::aarch64_sve_ld3_sret,
979 Intrinsic::aarch64_sve_ld4_sret,
980 };
981 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
982 LoadIDs[Name[0] - '2'], Ty);
983 return true;
984 }
985 return false; // No other 'aarch64.sve.ld*'.
986 }
987
988 if (Name.consume_front("tuple.")) {
989 // 'aarch64.sve.tuple.*'.
990 if (Name.starts_with("get")) {
991 // 'aarch64.sve.tuple.get*'.
992 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
994 F->getParent(), Intrinsic::vector_extract, Tys);
995 return true;
996 }
997
998 if (Name.starts_with("set")) {
999 // 'aarch64.sve.tuple.set*'.
1000 auto Args = F->getFunctionType()->params();
1001 Type *Tys[] = {Args[0], Args[2], Args[1]};
1003 F->getParent(), Intrinsic::vector_insert, Tys);
1004 return true;
1005 }
1006
1007 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
1008 if (CreateTupleRegex.match(Name)) {
1009 // 'aarch64.sve.tuple.create*'.
1010 auto Args = F->getFunctionType()->params();
1011 Type *Tys[] = {F->getReturnType(), Args[1]};
1013 F->getParent(), Intrinsic::vector_insert, Tys);
1014 return true;
1015 }
1016 return false; // No other 'aarch64.sve.tuple.*'.
1017 }
1018 return false; // No other 'aarch64.sve.*'.
1019 }
1020 }
1021 return false; // No other 'arm.*', 'aarch64.*'.
1022}
1023
1025 StringRef Name) {
1026 if (Name.consume_front("cp.async.bulk.tensor.g2s.")) {
1029 .Case("im2col.3d",
1030 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1031 .Case("im2col.4d",
1032 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1033 .Case("im2col.5d",
1034 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1035 .Case("tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1036 .Case("tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1037 .Case("tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1038 .Case("tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1039 .Case("tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1041
1043 return ID;
1044
1045 // These intrinsics may need upgrade for two reasons:
1046 // (1) When the address-space of the first argument is shared[AS=3]
1047 // (and we upgrade it to use shared_cluster address-space[AS=7])
1048 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1050 return ID;
1051
1052 // (2) When there are only two boolean flag arguments at the end:
1053 //
1054 // The last three parameters of the older version of these
1055 // intrinsics are: arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag
1056 //
1057 // The newer version reads as:
1058 // arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag, i32 cta_group_flag
1059 //
1060 // So, when the type of the [N-3]rd argument is "not i1", then
1061 // it is the older version and we need to upgrade.
1062 size_t FlagStartIndex = F->getFunctionType()->getNumParams() - 3;
1063 Type *ArgType = F->getFunctionType()->getParamType(FlagStartIndex);
1064 if (!ArgType->isIntegerTy(1))
1065 return ID;
1066 }
1067
1069}
1070
1072 StringRef Name) {
1073 if (Name.consume_front("mapa.shared.cluster"))
1074 if (F->getReturnType()->getPointerAddressSpace() ==
1076 return Intrinsic::nvvm_mapa_shared_cluster;
1077
1078 if (Name.consume_front("cp.async.bulk.")) {
1081 .Case("global.to.shared.cluster",
1082 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1083 .Case("shared.cta.to.cluster",
1084 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1086
1088 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1090 return ID;
1091 }
1092
1094}
1095
1097 if (Name.consume_front("fma.rn."))
1098 return StringSwitch<Intrinsic::ID>(Name)
1099 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
1100 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1101 .Case("ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
1102 .Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
1103 .Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
1104 .Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
1105 .Case("ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
1106 .Case("ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
1107 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1108 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1109 .Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
1110 .Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
1112
1113 if (Name.consume_front("fmax."))
1114 return StringSwitch<Intrinsic::ID>(Name)
1115 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
1116 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1117 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1118 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1119 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1120 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1121 .Case("ftz.nan.xorsign.abs.bf16",
1122 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1123 .Case("ftz.nan.xorsign.abs.bf16x2",
1124 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1125 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1126 .Case("ftz.xorsign.abs.bf16x2",
1127 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1128 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1129 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1130 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1131 .Case("nan.xorsign.abs.bf16x2",
1132 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1133 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1134 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1136
1137 if (Name.consume_front("fmin."))
1138 return StringSwitch<Intrinsic::ID>(Name)
1139 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
1140 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1141 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1142 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1143 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1144 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1145 .Case("ftz.nan.xorsign.abs.bf16",
1146 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1147 .Case("ftz.nan.xorsign.abs.bf16x2",
1148 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1149 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1150 .Case("ftz.xorsign.abs.bf16x2",
1151 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1152 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1153 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1154 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1155 .Case("nan.xorsign.abs.bf16x2",
1156 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1157 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1158 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1160
1161 if (Name.consume_front("neg."))
1162 return StringSwitch<Intrinsic::ID>(Name)
1163 .Case("bf16", Intrinsic::nvvm_neg_bf16)
1164 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
1166
1168}
1169
1171 return Name.consume_front("local") || Name.consume_front("shared") ||
1172 Name.consume_front("global") || Name.consume_front("constant") ||
1173 Name.consume_front("param");
1174}
1175
1177 bool CanUpgradeDebugIntrinsicsToRecords) {
1178 assert(F && "Illegal to upgrade a non-existent Function.");
1179
1180 StringRef Name = F->getName();
1181
1182 // Quickly eliminate it, if it's not a candidate.
1183 if (!Name.consume_front("llvm.") || Name.empty())
1184 return false;
1185
1186 switch (Name[0]) {
1187 default: break;
1188 case 'a': {
1189 bool IsArm = Name.consume_front("arm.");
1190 if (IsArm || Name.consume_front("aarch64.")) {
1191 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1192 return true;
1193 break;
1194 }
1195
1196 if (Name.consume_front("amdgcn.")) {
1197 if (Name == "alignbit") {
1198 // Target specific intrinsic became redundant
1200 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1201 return true;
1202 }
1203
1204 if (Name.consume_front("atomic.")) {
1205 if (Name.starts_with("inc") || Name.starts_with("dec")) {
1206 // These were replaced with atomicrmw uinc_wrap and udec_wrap, so
1207 // there's no new declaration.
1208 NewFn = nullptr;
1209 return true;
1210 }
1211 break; // No other 'amdgcn.atomic.*'
1212 }
1213
1214 if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
1215 Name.consume_front("flat.atomic.")) {
1216 if (Name.starts_with("fadd") ||
1217 // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1218 (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) ||
1219 (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) {
1220 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1221 // declaration.
1222 NewFn = nullptr;
1223 return true;
1224 }
1225 }
1226
1227 if (Name.starts_with("ldexp.")) {
1228 // Target specific intrinsic became redundant
1230 F->getParent(), Intrinsic::ldexp,
1231 {F->getReturnType(), F->getArg(1)->getType()});
1232 return true;
1233 }
1234 break; // No other 'amdgcn.*'
1235 }
1236
1237 break;
1238 }
1239 case 'c': {
1240 if (F->arg_size() == 1) {
1242 .StartsWith("ctlz.", Intrinsic::ctlz)
1243 .StartsWith("cttz.", Intrinsic::cttz)
1246 rename(F);
1247 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1248 F->arg_begin()->getType());
1249 return true;
1250 }
1251 }
1252
1253 if (F->arg_size() == 2 && Name == "coro.end") {
1254 rename(F);
1255 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1256 Intrinsic::coro_end);
1257 return true;
1258 }
1259
1260 break;
1261 }
1262 case 'd':
1263 if (Name.consume_front("dbg.")) {
1264 // Mark debug intrinsics for upgrade to new debug format.
1265 if (CanUpgradeDebugIntrinsicsToRecords) {
1266 if (Name == "addr" || Name == "value" || Name == "assign" ||
1267 Name == "declare" || Name == "label") {
1268 // There's no function to replace these with.
1269 NewFn = nullptr;
1270 // But we do want these to get upgraded.
1271 return true;
1272 }
1273 }
1274 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1275 // converted to DbgVariableRecords later.
1276 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1277 rename(F);
1278 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1279 Intrinsic::dbg_value);
1280 return true;
1281 }
1282 break; // No other 'dbg.*'.
1283 }
1284 break;
1285 case 'e':
1286 if (Name.consume_front("experimental.vector.")) {
1289 // Skip over extract.last.active, otherwise it will be 'upgraded'
1290 // to a regular vector extract which is a different operation.
1291 .StartsWith("extract.last.active.", Intrinsic::not_intrinsic)
1292 .StartsWith("extract.", Intrinsic::vector_extract)
1293 .StartsWith("insert.", Intrinsic::vector_insert)
1294 .StartsWith("splice.", Intrinsic::vector_splice)
1295 .StartsWith("reverse.", Intrinsic::vector_reverse)
1296 .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1297 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1298 .StartsWith("partial.reduce.add",
1299 Intrinsic::vector_partial_reduce_add)
1302 const auto *FT = F->getFunctionType();
1304 if (ID == Intrinsic::vector_extract ||
1305 ID == Intrinsic::vector_interleave2)
1306 // Extracting overloads the return type.
1307 Tys.push_back(FT->getReturnType());
1308 if (ID != Intrinsic::vector_interleave2)
1309 Tys.push_back(FT->getParamType(0));
1310 if (ID == Intrinsic::vector_insert ||
1311 ID == Intrinsic::vector_partial_reduce_add)
1312 // Inserting overloads the inserted type.
1313 Tys.push_back(FT->getParamType(1));
1314 rename(F);
1315 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
1316 return true;
1317 }
1318
1319 if (Name.consume_front("reduce.")) {
1321 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1322 if (R.match(Name, &Groups))
1324 .Case("add", Intrinsic::vector_reduce_add)
1325 .Case("mul", Intrinsic::vector_reduce_mul)
1326 .Case("and", Intrinsic::vector_reduce_and)
1327 .Case("or", Intrinsic::vector_reduce_or)
1328 .Case("xor", Intrinsic::vector_reduce_xor)
1329 .Case("smax", Intrinsic::vector_reduce_smax)
1330 .Case("smin", Intrinsic::vector_reduce_smin)
1331 .Case("umax", Intrinsic::vector_reduce_umax)
1332 .Case("umin", Intrinsic::vector_reduce_umin)
1333 .Case("fmax", Intrinsic::vector_reduce_fmax)
1334 .Case("fmin", Intrinsic::vector_reduce_fmin)
1336
1337 bool V2 = false;
1339 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1340 Groups.clear();
1341 V2 = true;
1342 if (R2.match(Name, &Groups))
1344 .Case("fadd", Intrinsic::vector_reduce_fadd)
1345 .Case("fmul", Intrinsic::vector_reduce_fmul)
1347 }
1349 rename(F);
1350 auto Args = F->getFunctionType()->params();
1351 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1352 {Args[V2 ? 1 : 0]});
1353 return true;
1354 }
1355 break; // No other 'expermental.vector.reduce.*'.
1356 }
1357 break; // No other 'experimental.vector.*'.
1358 }
1359 if (Name.consume_front("experimental.stepvector.")) {
1360 Intrinsic::ID ID = Intrinsic::stepvector;
1361 rename(F);
1363 F->getParent(), ID, F->getFunctionType()->getReturnType());
1364 return true;
1365 }
1366 break; // No other 'e*'.
1367 case 'f':
1368 if (Name.starts_with("flt.rounds")) {
1369 rename(F);
1370 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1371 Intrinsic::get_rounding);
1372 return true;
1373 }
1374 break;
1375 case 'i':
1376 if (Name.starts_with("invariant.group.barrier")) {
1377 // Rename invariant.group.barrier to launder.invariant.group
1378 auto Args = F->getFunctionType()->params();
1379 Type* ObjectPtr[1] = {Args[0]};
1380 rename(F);
1382 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1383 return true;
1384 }
1385 break;
1386 case 'l':
1387 if ((Name.starts_with("lifetime.start") ||
1388 Name.starts_with("lifetime.end")) &&
1389 F->arg_size() == 2) {
1390 Intrinsic::ID IID = Name.starts_with("lifetime.start")
1391 ? Intrinsic::lifetime_start
1392 : Intrinsic::lifetime_end;
1393 rename(F);
1394 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1395 F->getArg(0)->getType());
1396 return true;
1397 }
1398 break;
1399 case 'm': {
1400 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1401 // alignment parameter to embedding the alignment as an attribute of
1402 // the pointer args.
1403 if (unsigned ID = StringSwitch<unsigned>(Name)
1404 .StartsWith("memcpy.", Intrinsic::memcpy)
1405 .StartsWith("memmove.", Intrinsic::memmove)
1406 .Default(0)) {
1407 if (F->arg_size() == 5) {
1408 rename(F);
1409 // Get the types of dest, src, and len
1410 ArrayRef<Type *> ParamTypes =
1411 F->getFunctionType()->params().slice(0, 3);
1412 NewFn =
1413 Intrinsic::getOrInsertDeclaration(F->getParent(), ID, ParamTypes);
1414 return true;
1415 }
1416 }
1417 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1418 rename(F);
1419 // Get the types of dest, and len
1420 const auto *FT = F->getFunctionType();
1421 Type *ParamTypes[2] = {
1422 FT->getParamType(0), // Dest
1423 FT->getParamType(2) // len
1424 };
1425 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1426 Intrinsic::memset, ParamTypes);
1427 return true;
1428 }
1429
1430 unsigned MaskedID =
1432 .StartsWith("masked.load", Intrinsic::masked_load)
1433 .StartsWith("masked.gather", Intrinsic::masked_gather)
1434 .StartsWith("masked.store", Intrinsic::masked_store)
1435 .StartsWith("masked.scatter", Intrinsic::masked_scatter)
1436 .Default(0);
1437 if (MaskedID && F->arg_size() == 4) {
1438 rename(F);
1439 if (MaskedID == Intrinsic::masked_load ||
1440 MaskedID == Intrinsic::masked_gather) {
1442 F->getParent(), MaskedID,
1443 {F->getReturnType(), F->getArg(0)->getType()});
1444 return true;
1445 }
1447 F->getParent(), MaskedID,
1448 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1449 return true;
1450 }
1451 break;
1452 }
1453 case 'n': {
1454 if (Name.consume_front("nvvm.")) {
1455 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1456 if (F->arg_size() == 1) {
1457 Intrinsic::ID IID =
1459 .Cases({"brev32", "brev64"}, Intrinsic::bitreverse)
1460 .Case("clz.i", Intrinsic::ctlz)
1461 .Case("popc.i", Intrinsic::ctpop)
1463 if (IID != Intrinsic::not_intrinsic) {
1464 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1465 {F->getReturnType()});
1466 return true;
1467 }
1468 }
1469
1470 // Check for nvvm intrinsics that need a return type adjustment.
1471 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1473 if (IID != Intrinsic::not_intrinsic) {
1474 NewFn = nullptr;
1475 return true;
1476 }
1477 }
1478
1479 // Upgrade Distributed Shared Memory Intrinsics
1481 if (IID != Intrinsic::not_intrinsic) {
1482 rename(F);
1483 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1484 return true;
1485 }
1486
1487 // Upgrade TMA copy G2S Intrinsics
1489 if (IID != Intrinsic::not_intrinsic) {
1490 rename(F);
1491 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1492 return true;
1493 }
1494
1495 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1496 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1497 //
1498 // TODO: We could add lohi.i2d.
1499 bool Expand = false;
1500 if (Name.consume_front("abs."))
1501 // nvvm.abs.{i,ii}
1502 Expand =
1503 Name == "i" || Name == "ll" || Name == "bf16" || Name == "bf16x2";
1504 else if (Name.consume_front("fabs."))
1505 // nvvm.fabs.{f,ftz.f,d}
1506 Expand = Name == "f" || Name == "ftz.f" || Name == "d";
1507 else if (Name.consume_front("ex2.approx."))
1508 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
1509 Expand =
1510 Name == "f" || Name == "ftz.f" || Name == "d" || Name == "f16x2";
1511 else if (Name.consume_front("max.") || Name.consume_front("min."))
1512 // nvvm.{min,max}.{i,ii,ui,ull}
1513 Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
1514 Name == "ui" || Name == "ull";
1515 else if (Name.consume_front("atomic.load."))
1516 // nvvm.atomic.load.add.{f32,f64}.p
1517 // nvvm.atomic.load.{inc,dec}.32.p
1518 Expand = StringSwitch<bool>(Name)
1519 .StartsWith("add.f32.p", true)
1520 .StartsWith("add.f64.p", true)
1521 .StartsWith("inc.32.p", true)
1522 .StartsWith("dec.32.p", true)
1523 .Default(false);
1524 else if (Name.consume_front("bitcast."))
1525 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1526 Expand =
1527 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1528 else if (Name.consume_front("rotate."))
1529 // nvvm.rotate.{b32,b64,right.b64}
1530 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1531 else if (Name.consume_front("ptr.gen.to."))
1532 // nvvm.ptr.gen.to.{local,shared,global,constant,param}
1533 Expand = consumeNVVMPtrAddrSpace(Name);
1534 else if (Name.consume_front("ptr."))
1535 // nvvm.ptr.{local,shared,global,constant,param}.to.gen
1536 Expand = consumeNVVMPtrAddrSpace(Name) && Name.starts_with(".to.gen");
1537 else if (Name.consume_front("ldg.global."))
1538 // nvvm.ldg.global.{i,p,f}
1539 Expand = (Name.starts_with("i.") || Name.starts_with("f.") ||
1540 Name.starts_with("p."));
1541 else
1542 Expand = StringSwitch<bool>(Name)
1543 .Case("barrier0", true)
1544 .Case("barrier.n", true)
1545 .Case("barrier.sync.cnt", true)
1546 .Case("barrier.sync", true)
1547 .Case("barrier", true)
1548 .Case("bar.sync", true)
1549 .Case("clz.ll", true)
1550 .Case("popc.ll", true)
1551 .Case("h2f", true)
1552 .Case("swap.lo.hi.b64", true)
1553 .Case("tanh.approx.f32", true)
1554 .Default(false);
1555
1556 if (Expand) {
1557 NewFn = nullptr;
1558 return true;
1559 }
1560 break; // No other 'nvvm.*'.
1561 }
1562 break;
1563 }
1564 case 'o':
1565 if (Name.starts_with("objectsize.")) {
1566 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1567 if (F->arg_size() == 2 || F->arg_size() == 3) {
1568 rename(F);
1569 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1570 Intrinsic::objectsize, Tys);
1571 return true;
1572 }
1573 }
1574 break;
1575
1576 case 'p':
1577 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1578 rename(F);
1580 F->getParent(), Intrinsic::ptr_annotation,
1581 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1582 return true;
1583 }
1584 break;
1585
1586 case 'r': {
1587 if (Name.consume_front("riscv.")) {
1590 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1591 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1592 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1593 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1596 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1597 rename(F);
1598 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1599 return true;
1600 }
1601 break; // No other applicable upgrades.
1602 }
1603
1605 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1606 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1609 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1610 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1611 rename(F);
1612 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1613 return true;
1614 }
1615 break; // No other applicable upgrades.
1616 }
1617
1619 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1620 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1621 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1622 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1623 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1624 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1627 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1628 rename(F);
1629 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1630 return true;
1631 }
1632 break; // No other applicable upgrades.
1633 }
1634 break; // No other 'riscv.*' intrinsics
1635 }
1636 } break;
1637
1638 case 's':
1639 if (Name == "stackprotectorcheck") {
1640 NewFn = nullptr;
1641 return true;
1642 }
1643 break;
1644
1645 case 't':
1646 if (Name == "thread.pointer") {
1648 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
1649 return true;
1650 }
1651 break;
1652
1653 case 'v': {
1654 if (Name == "var.annotation" && F->arg_size() == 4) {
1655 rename(F);
1657 F->getParent(), Intrinsic::var_annotation,
1658 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1659 return true;
1660 }
1661 break;
1662 }
1663
1664 case 'w':
1665 if (Name.consume_front("wasm.")) {
1668 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1669 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1670 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1673 rename(F);
1674 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1675 F->getReturnType());
1676 return true;
1677 }
1678
1679 if (Name.consume_front("dot.i8x16.i7x16.")) {
1681 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1682 .Case("add.signed",
1683 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1686 rename(F);
1687 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1688 return true;
1689 }
1690 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1691 }
1692 break; // No other 'wasm.*'.
1693 }
1694 break;
1695
1696 case 'x':
1697 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1698 return true;
1699 }
1700
1701 auto *ST = dyn_cast<StructType>(F->getReturnType());
1702 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1703 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1704 // Replace return type with literal non-packed struct. Only do this for
1705 // intrinsics declared to return a struct, not for intrinsics with
1706 // overloaded return type, in which case the exact struct type will be
1707 // mangled into the name.
1710 if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1711 auto *FT = F->getFunctionType();
1712 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1713 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1714 std::string Name = F->getName().str();
1715 rename(F);
1716 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1717 Name, F->getParent());
1718
1719 // The new function may also need remangling.
1720 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1721 NewFn = *Result;
1722 return true;
1723 }
1724 }
1725
1726 // Remangle our intrinsic since we upgrade the mangling
1728 if (Result != std::nullopt) {
1729 NewFn = *Result;
1730 return true;
1731 }
1732
1733 // This may not belong here. This function is effectively being overloaded
1734 // to both detect an intrinsic which needs upgrading, and to provide the
1735 // upgraded form of the intrinsic. We should perhaps have two separate
1736 // functions for this.
1737 return false;
1738}
1739
1741 bool CanUpgradeDebugIntrinsicsToRecords) {
1742 NewFn = nullptr;
1743 bool Upgraded =
1744 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1745
1746 // Upgrade intrinsic attributes. This does not change the function.
1747 if (NewFn)
1748 F = NewFn;
1749 if (Intrinsic::ID id = F->getIntrinsicID()) {
1750 // Only do this if the intrinsic signature is valid.
1751 SmallVector<Type *> OverloadTys;
1752 if (Intrinsic::getIntrinsicSignature(id, F->getFunctionType(), OverloadTys))
1753 F->setAttributes(
1754 Intrinsic::getAttributes(F->getContext(), id, F->getFunctionType()));
1755 }
1756 return Upgraded;
1757}
1758
1760 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1761 GV->getName() == "llvm.global_dtors")) ||
1762 !GV->hasInitializer())
1763 return nullptr;
1765 if (!ATy)
1766 return nullptr;
1768 if (!STy || STy->getNumElements() != 2)
1769 return nullptr;
1770
1771 LLVMContext &C = GV->getContext();
1772 IRBuilder<> IRB(C);
1773 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1774 IRB.getPtrTy());
1775 Constant *Init = GV->getInitializer();
1776 unsigned N = Init->getNumOperands();
1777 std::vector<Constant *> NewCtors(N);
1778 for (unsigned i = 0; i != N; ++i) {
1779 auto Ctor = cast<Constant>(Init->getOperand(i));
1780 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1781 Ctor->getAggregateElement(1),
1783 }
1784 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1785
1786 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1787 NewInit, GV->getName());
1788}
1789
1790// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1791// to byte shuffles.
1793 unsigned Shift) {
1794 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1795 unsigned NumElts = ResultTy->getNumElements() * 8;
1796
1797 // Bitcast from a 64-bit element type to a byte element type.
1798 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1799 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1800
1801 // We'll be shuffling in zeroes.
1802 Value *Res = Constant::getNullValue(VecTy);
1803
1804 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1805 // we'll just return the zero vector.
1806 if (Shift < 16) {
1807 int Idxs[64];
1808 // 256/512-bit version is split into 2/4 16-byte lanes.
1809 for (unsigned l = 0; l != NumElts; l += 16)
1810 for (unsigned i = 0; i != 16; ++i) {
1811 unsigned Idx = NumElts + i - Shift;
1812 if (Idx < NumElts)
1813 Idx -= NumElts - 16; // end of lane, switch operand.
1814 Idxs[l + i] = Idx + l;
1815 }
1816
1817 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1818 }
1819
1820 // Bitcast back to a 64-bit element type.
1821 return Builder.CreateBitCast(Res, ResultTy, "cast");
1822}
1823
1824// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1825// to byte shuffles.
1827 unsigned Shift) {
1828 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1829 unsigned NumElts = ResultTy->getNumElements() * 8;
1830
1831 // Bitcast from a 64-bit element type to a byte element type.
1832 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1833 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1834
1835 // We'll be shuffling in zeroes.
1836 Value *Res = Constant::getNullValue(VecTy);
1837
1838 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1839 // we'll just return the zero vector.
1840 if (Shift < 16) {
1841 int Idxs[64];
1842 // 256/512-bit version is split into 2/4 16-byte lanes.
1843 for (unsigned l = 0; l != NumElts; l += 16)
1844 for (unsigned i = 0; i != 16; ++i) {
1845 unsigned Idx = i + Shift;
1846 if (Idx >= 16)
1847 Idx += NumElts - 16; // end of lane, switch operand.
1848 Idxs[l + i] = Idx + l;
1849 }
1850
1851 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
1852 }
1853
1854 // Bitcast back to a 64-bit element type.
1855 return Builder.CreateBitCast(Res, ResultTy, "cast");
1856}
1857
1858static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1859 unsigned NumElts) {
1860 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1862 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1863 Mask = Builder.CreateBitCast(Mask, MaskTy);
1864
1865 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1866 // i8 and we need to extract down to the right number of elements.
1867 if (NumElts <= 4) {
1868 int Indices[4];
1869 for (unsigned i = 0; i != NumElts; ++i)
1870 Indices[i] = i;
1871 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
1872 "extract");
1873 }
1874
1875 return Mask;
1876}
1877
1878static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1879 Value *Op1) {
1880 // If the mask is all ones just emit the first operation.
1881 if (const auto *C = dyn_cast<Constant>(Mask))
1882 if (C->isAllOnesValue())
1883 return Op0;
1884
1885 Mask = getX86MaskVec(Builder, Mask,
1886 cast<FixedVectorType>(Op0->getType())->getNumElements());
1887 return Builder.CreateSelect(Mask, Op0, Op1);
1888}
1889
1890static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1891 Value *Op1) {
1892 // If the mask is all ones just emit the first operation.
1893 if (const auto *C = dyn_cast<Constant>(Mask))
1894 if (C->isAllOnesValue())
1895 return Op0;
1896
1897 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1898 Mask->getType()->getIntegerBitWidth());
1899 Mask = Builder.CreateBitCast(Mask, MaskTy);
1900 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1901 return Builder.CreateSelect(Mask, Op0, Op1);
1902}
1903
1904// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1905// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1906// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1908 Value *Op1, Value *Shift,
1909 Value *Passthru, Value *Mask,
1910 bool IsVALIGN) {
1911 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1912
1913 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1914 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1915 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1916 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1917
1918 // Mask the immediate for VALIGN.
1919 if (IsVALIGN)
1920 ShiftVal &= (NumElts - 1);
1921
1922 // If palignr is shifting the pair of vectors more than the size of two
1923 // lanes, emit zero.
1924 if (ShiftVal >= 32)
1926
1927 // If palignr is shifting the pair of input vectors more than one lane,
1928 // but less than two lanes, convert to shifting in zeroes.
1929 if (ShiftVal > 16) {
1930 ShiftVal -= 16;
1931 Op1 = Op0;
1933 }
1934
1935 int Indices[64];
1936 // 256-bit palignr operates on 128-bit lanes so we need to handle that
1937 for (unsigned l = 0; l < NumElts; l += 16) {
1938 for (unsigned i = 0; i != 16; ++i) {
1939 unsigned Idx = ShiftVal + i;
1940 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1941 Idx += NumElts - 16; // End of lane, switch operand.
1942 Indices[l + i] = Idx + l;
1943 }
1944 }
1945
1946 Value *Align = Builder.CreateShuffleVector(
1947 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
1948
1949 return emitX86Select(Builder, Mask, Align, Passthru);
1950}
1951
1953 bool ZeroMask, bool IndexForm) {
1954 Type *Ty = CI.getType();
1955 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1956 unsigned EltWidth = Ty->getScalarSizeInBits();
1957 bool IsFloat = Ty->isFPOrFPVectorTy();
1958 Intrinsic::ID IID;
1959 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1960 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1961 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1962 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1963 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1964 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1965 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1966 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1967 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1968 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1969 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1970 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1971 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1972 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1973 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1974 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1975 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1976 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1977 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1978 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1979 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1980 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1981 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1982 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1983 else if (VecWidth == 128 && EltWidth == 16)
1984 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1985 else if (VecWidth == 256 && EltWidth == 16)
1986 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1987 else if (VecWidth == 512 && EltWidth == 16)
1988 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1989 else if (VecWidth == 128 && EltWidth == 8)
1990 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1991 else if (VecWidth == 256 && EltWidth == 8)
1992 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1993 else if (VecWidth == 512 && EltWidth == 8)
1994 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1995 else
1996 llvm_unreachable("Unexpected intrinsic");
1997
1998 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1999 CI.getArgOperand(2) };
2000
2001 // If this isn't index form we need to swap operand 0 and 1.
2002 if (!IndexForm)
2003 std::swap(Args[0], Args[1]);
2004
2005 Value *V = Builder.CreateIntrinsic(IID, Args);
2006 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
2007 : Builder.CreateBitCast(CI.getArgOperand(1),
2008 Ty);
2009 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
2010}
2011
2013 Intrinsic::ID IID) {
2014 Type *Ty = CI.getType();
2015 Value *Op0 = CI.getOperand(0);
2016 Value *Op1 = CI.getOperand(1);
2017 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2018
2019 if (CI.arg_size() == 4) { // For masked intrinsics.
2020 Value *VecSrc = CI.getOperand(2);
2021 Value *Mask = CI.getOperand(3);
2022 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2023 }
2024 return Res;
2025}
2026
2028 bool IsRotateRight) {
2029 Type *Ty = CI.getType();
2030 Value *Src = CI.getArgOperand(0);
2031 Value *Amt = CI.getArgOperand(1);
2032
2033 // Amount may be scalar immediate, in which case create a splat vector.
2034 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2035 // we only care about the lowest log2 bits anyway.
2036 if (Amt->getType() != Ty) {
2037 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2038 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2039 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2040 }
2041
2042 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2043 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2044
2045 if (CI.arg_size() == 4) { // For masked intrinsics.
2046 Value *VecSrc = CI.getOperand(2);
2047 Value *Mask = CI.getOperand(3);
2048 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2049 }
2050 return Res;
2051}
2052
2053static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
2054 bool IsSigned) {
2055 Type *Ty = CI.getType();
2056 Value *LHS = CI.getArgOperand(0);
2057 Value *RHS = CI.getArgOperand(1);
2058
2059 CmpInst::Predicate Pred;
2060 switch (Imm) {
2061 case 0x0:
2062 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
2063 break;
2064 case 0x1:
2065 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
2066 break;
2067 case 0x2:
2068 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
2069 break;
2070 case 0x3:
2071 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
2072 break;
2073 case 0x4:
2074 Pred = ICmpInst::ICMP_EQ;
2075 break;
2076 case 0x5:
2077 Pred = ICmpInst::ICMP_NE;
2078 break;
2079 case 0x6:
2080 return Constant::getNullValue(Ty); // FALSE
2081 case 0x7:
2082 return Constant::getAllOnesValue(Ty); // TRUE
2083 default:
2084 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
2085 }
2086
2087 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
2088 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2089 return Ext;
2090}
2091
2093 bool IsShiftRight, bool ZeroMask) {
2094 Type *Ty = CI.getType();
2095 Value *Op0 = CI.getArgOperand(0);
2096 Value *Op1 = CI.getArgOperand(1);
2097 Value *Amt = CI.getArgOperand(2);
2098
2099 if (IsShiftRight)
2100 std::swap(Op0, Op1);
2101
2102 // Amount may be scalar immediate, in which case create a splat vector.
2103 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2104 // we only care about the lowest log2 bits anyway.
2105 if (Amt->getType() != Ty) {
2106 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2107 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2108 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2109 }
2110
2111 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2112 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2113
2114 unsigned NumArgs = CI.arg_size();
2115 if (NumArgs >= 4) { // For masked intrinsics.
2116 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
2117 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
2118 CI.getArgOperand(0);
2119 Value *Mask = CI.getOperand(NumArgs - 1);
2120 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2121 }
2122 return Res;
2123}
2124
2126 Value *Mask, bool Aligned) {
2127 const Align Alignment =
2128 Aligned
2129 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2130 : Align(1);
2131
2132 // If the mask is all ones just emit a regular store.
2133 if (const auto *C = dyn_cast<Constant>(Mask))
2134 if (C->isAllOnesValue())
2135 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
2136
2137 // Convert the mask from an integer type to a vector of i1.
2138 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
2139 Mask = getX86MaskVec(Builder, Mask, NumElts);
2140 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
2141}
2142
2144 Value *Passthru, Value *Mask, bool Aligned) {
2145 Type *ValTy = Passthru->getType();
2146 const Align Alignment =
2147 Aligned
2148 ? Align(
2150 8)
2151 : Align(1);
2152
2153 // If the mask is all ones just emit a regular store.
2154 if (const auto *C = dyn_cast<Constant>(Mask))
2155 if (C->isAllOnesValue())
2156 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2157
2158 // Convert the mask from an integer type to a vector of i1.
2159 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
2160 Mask = getX86MaskVec(Builder, Mask, NumElts);
2161 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2162}
2163
2164static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
2165 Type *Ty = CI.getType();
2166 Value *Op0 = CI.getArgOperand(0);
2167 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2168 {Op0, Builder.getInt1(false)});
2169 if (CI.arg_size() == 3)
2170 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
2171 return Res;
2172}
2173
2174static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
2175 Type *Ty = CI.getType();
2176
2177 // Arguments have a vXi32 type so cast to vXi64.
2178 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
2179 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
2180
2181 if (IsSigned) {
2182 // Shift left then arithmetic shift right.
2183 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2184 LHS = Builder.CreateShl(LHS, ShiftAmt);
2185 LHS = Builder.CreateAShr(LHS, ShiftAmt);
2186 RHS = Builder.CreateShl(RHS, ShiftAmt);
2187 RHS = Builder.CreateAShr(RHS, ShiftAmt);
2188 } else {
2189 // Clear the upper bits.
2190 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2191 LHS = Builder.CreateAnd(LHS, Mask);
2192 RHS = Builder.CreateAnd(RHS, Mask);
2193 }
2194
2195 Value *Res = Builder.CreateMul(LHS, RHS);
2196
2197 if (CI.arg_size() == 4)
2198 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
2199
2200 return Res;
2201}
2202
2203// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
2205 Value *Mask) {
2206 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
2207 if (Mask) {
2208 const auto *C = dyn_cast<Constant>(Mask);
2209 if (!C || !C->isAllOnesValue())
2210 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
2211 }
2212
2213 if (NumElts < 8) {
2214 int Indices[8];
2215 for (unsigned i = 0; i != NumElts; ++i)
2216 Indices[i] = i;
2217 for (unsigned i = NumElts; i != 8; ++i)
2218 Indices[i] = NumElts + i % NumElts;
2219 Vec = Builder.CreateShuffleVector(Vec,
2221 Indices);
2222 }
2223 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2224}
2225
2227 unsigned CC, bool Signed) {
2228 Value *Op0 = CI.getArgOperand(0);
2229 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2230
2231 Value *Cmp;
2232 if (CC == 3) {
2234 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2235 } else if (CC == 7) {
2237 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2238 } else {
2240 switch (CC) {
2241 default: llvm_unreachable("Unknown condition code");
2242 case 0: Pred = ICmpInst::ICMP_EQ; break;
2243 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
2244 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
2245 case 4: Pred = ICmpInst::ICMP_NE; break;
2246 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
2247 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
2248 }
2249 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
2250 }
2251
2252 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
2253
2254 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
2255}
2256
2257// Replace a masked intrinsic with an older unmasked intrinsic.
2259 Intrinsic::ID IID) {
2260 Value *Rep =
2261 Builder.CreateIntrinsic(IID, {CI.getArgOperand(0), CI.getArgOperand(1)});
2262 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
2263}
2264
2266 Value* A = CI.getArgOperand(0);
2267 Value* B = CI.getArgOperand(1);
2268 Value* Src = CI.getArgOperand(2);
2269 Value* Mask = CI.getArgOperand(3);
2270
2271 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
2272 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2273 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
2274 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
2275 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2276 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
2277}
2278
2280 Value* Op = CI.getArgOperand(0);
2281 Type* ReturnOp = CI.getType();
2282 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
2283 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
2284 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
2285}
2286
2287// Replace intrinsic with unmasked version and a select.
2289 CallBase &CI, Value *&Rep) {
2290 Name = Name.substr(12); // Remove avx512.mask.
2291
2292 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2293 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2294 Intrinsic::ID IID;
2295 if (Name.starts_with("max.p")) {
2296 if (VecWidth == 128 && EltWidth == 32)
2297 IID = Intrinsic::x86_sse_max_ps;
2298 else if (VecWidth == 128 && EltWidth == 64)
2299 IID = Intrinsic::x86_sse2_max_pd;
2300 else if (VecWidth == 256 && EltWidth == 32)
2301 IID = Intrinsic::x86_avx_max_ps_256;
2302 else if (VecWidth == 256 && EltWidth == 64)
2303 IID = Intrinsic::x86_avx_max_pd_256;
2304 else
2305 llvm_unreachable("Unexpected intrinsic");
2306 } else if (Name.starts_with("min.p")) {
2307 if (VecWidth == 128 && EltWidth == 32)
2308 IID = Intrinsic::x86_sse_min_ps;
2309 else if (VecWidth == 128 && EltWidth == 64)
2310 IID = Intrinsic::x86_sse2_min_pd;
2311 else if (VecWidth == 256 && EltWidth == 32)
2312 IID = Intrinsic::x86_avx_min_ps_256;
2313 else if (VecWidth == 256 && EltWidth == 64)
2314 IID = Intrinsic::x86_avx_min_pd_256;
2315 else
2316 llvm_unreachable("Unexpected intrinsic");
2317 } else if (Name.starts_with("pshuf.b.")) {
2318 if (VecWidth == 128)
2319 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2320 else if (VecWidth == 256)
2321 IID = Intrinsic::x86_avx2_pshuf_b;
2322 else if (VecWidth == 512)
2323 IID = Intrinsic::x86_avx512_pshuf_b_512;
2324 else
2325 llvm_unreachable("Unexpected intrinsic");
2326 } else if (Name.starts_with("pmul.hr.sw.")) {
2327 if (VecWidth == 128)
2328 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2329 else if (VecWidth == 256)
2330 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2331 else if (VecWidth == 512)
2332 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2333 else
2334 llvm_unreachable("Unexpected intrinsic");
2335 } else if (Name.starts_with("pmulh.w.")) {
2336 if (VecWidth == 128)
2337 IID = Intrinsic::x86_sse2_pmulh_w;
2338 else if (VecWidth == 256)
2339 IID = Intrinsic::x86_avx2_pmulh_w;
2340 else if (VecWidth == 512)
2341 IID = Intrinsic::x86_avx512_pmulh_w_512;
2342 else
2343 llvm_unreachable("Unexpected intrinsic");
2344 } else if (Name.starts_with("pmulhu.w.")) {
2345 if (VecWidth == 128)
2346 IID = Intrinsic::x86_sse2_pmulhu_w;
2347 else if (VecWidth == 256)
2348 IID = Intrinsic::x86_avx2_pmulhu_w;
2349 else if (VecWidth == 512)
2350 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2351 else
2352 llvm_unreachable("Unexpected intrinsic");
2353 } else if (Name.starts_with("pmaddw.d.")) {
2354 if (VecWidth == 128)
2355 IID = Intrinsic::x86_sse2_pmadd_wd;
2356 else if (VecWidth == 256)
2357 IID = Intrinsic::x86_avx2_pmadd_wd;
2358 else if (VecWidth == 512)
2359 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2360 else
2361 llvm_unreachable("Unexpected intrinsic");
2362 } else if (Name.starts_with("pmaddubs.w.")) {
2363 if (VecWidth == 128)
2364 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2365 else if (VecWidth == 256)
2366 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2367 else if (VecWidth == 512)
2368 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2369 else
2370 llvm_unreachable("Unexpected intrinsic");
2371 } else if (Name.starts_with("packsswb.")) {
2372 if (VecWidth == 128)
2373 IID = Intrinsic::x86_sse2_packsswb_128;
2374 else if (VecWidth == 256)
2375 IID = Intrinsic::x86_avx2_packsswb;
2376 else if (VecWidth == 512)
2377 IID = Intrinsic::x86_avx512_packsswb_512;
2378 else
2379 llvm_unreachable("Unexpected intrinsic");
2380 } else if (Name.starts_with("packssdw.")) {
2381 if (VecWidth == 128)
2382 IID = Intrinsic::x86_sse2_packssdw_128;
2383 else if (VecWidth == 256)
2384 IID = Intrinsic::x86_avx2_packssdw;
2385 else if (VecWidth == 512)
2386 IID = Intrinsic::x86_avx512_packssdw_512;
2387 else
2388 llvm_unreachable("Unexpected intrinsic");
2389 } else if (Name.starts_with("packuswb.")) {
2390 if (VecWidth == 128)
2391 IID = Intrinsic::x86_sse2_packuswb_128;
2392 else if (VecWidth == 256)
2393 IID = Intrinsic::x86_avx2_packuswb;
2394 else if (VecWidth == 512)
2395 IID = Intrinsic::x86_avx512_packuswb_512;
2396 else
2397 llvm_unreachable("Unexpected intrinsic");
2398 } else if (Name.starts_with("packusdw.")) {
2399 if (VecWidth == 128)
2400 IID = Intrinsic::x86_sse41_packusdw;
2401 else if (VecWidth == 256)
2402 IID = Intrinsic::x86_avx2_packusdw;
2403 else if (VecWidth == 512)
2404 IID = Intrinsic::x86_avx512_packusdw_512;
2405 else
2406 llvm_unreachable("Unexpected intrinsic");
2407 } else if (Name.starts_with("vpermilvar.")) {
2408 if (VecWidth == 128 && EltWidth == 32)
2409 IID = Intrinsic::x86_avx_vpermilvar_ps;
2410 else if (VecWidth == 128 && EltWidth == 64)
2411 IID = Intrinsic::x86_avx_vpermilvar_pd;
2412 else if (VecWidth == 256 && EltWidth == 32)
2413 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2414 else if (VecWidth == 256 && EltWidth == 64)
2415 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2416 else if (VecWidth == 512 && EltWidth == 32)
2417 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2418 else if (VecWidth == 512 && EltWidth == 64)
2419 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2420 else
2421 llvm_unreachable("Unexpected intrinsic");
2422 } else if (Name == "cvtpd2dq.256") {
2423 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2424 } else if (Name == "cvtpd2ps.256") {
2425 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2426 } else if (Name == "cvttpd2dq.256") {
2427 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2428 } else if (Name == "cvttps2dq.128") {
2429 IID = Intrinsic::x86_sse2_cvttps2dq;
2430 } else if (Name == "cvttps2dq.256") {
2431 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2432 } else if (Name.starts_with("permvar.")) {
2433 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2434 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2435 IID = Intrinsic::x86_avx2_permps;
2436 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2437 IID = Intrinsic::x86_avx2_permd;
2438 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2439 IID = Intrinsic::x86_avx512_permvar_df_256;
2440 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2441 IID = Intrinsic::x86_avx512_permvar_di_256;
2442 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2443 IID = Intrinsic::x86_avx512_permvar_sf_512;
2444 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2445 IID = Intrinsic::x86_avx512_permvar_si_512;
2446 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2447 IID = Intrinsic::x86_avx512_permvar_df_512;
2448 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2449 IID = Intrinsic::x86_avx512_permvar_di_512;
2450 else if (VecWidth == 128 && EltWidth == 16)
2451 IID = Intrinsic::x86_avx512_permvar_hi_128;
2452 else if (VecWidth == 256 && EltWidth == 16)
2453 IID = Intrinsic::x86_avx512_permvar_hi_256;
2454 else if (VecWidth == 512 && EltWidth == 16)
2455 IID = Intrinsic::x86_avx512_permvar_hi_512;
2456 else if (VecWidth == 128 && EltWidth == 8)
2457 IID = Intrinsic::x86_avx512_permvar_qi_128;
2458 else if (VecWidth == 256 && EltWidth == 8)
2459 IID = Intrinsic::x86_avx512_permvar_qi_256;
2460 else if (VecWidth == 512 && EltWidth == 8)
2461 IID = Intrinsic::x86_avx512_permvar_qi_512;
2462 else
2463 llvm_unreachable("Unexpected intrinsic");
2464 } else if (Name.starts_with("dbpsadbw.")) {
2465 if (VecWidth == 128)
2466 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2467 else if (VecWidth == 256)
2468 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2469 else if (VecWidth == 512)
2470 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2471 else
2472 llvm_unreachable("Unexpected intrinsic");
2473 } else if (Name.starts_with("pmultishift.qb.")) {
2474 if (VecWidth == 128)
2475 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2476 else if (VecWidth == 256)
2477 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2478 else if (VecWidth == 512)
2479 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2480 else
2481 llvm_unreachable("Unexpected intrinsic");
2482 } else if (Name.starts_with("conflict.")) {
2483 if (Name[9] == 'd' && VecWidth == 128)
2484 IID = Intrinsic::x86_avx512_conflict_d_128;
2485 else if (Name[9] == 'd' && VecWidth == 256)
2486 IID = Intrinsic::x86_avx512_conflict_d_256;
2487 else if (Name[9] == 'd' && VecWidth == 512)
2488 IID = Intrinsic::x86_avx512_conflict_d_512;
2489 else if (Name[9] == 'q' && VecWidth == 128)
2490 IID = Intrinsic::x86_avx512_conflict_q_128;
2491 else if (Name[9] == 'q' && VecWidth == 256)
2492 IID = Intrinsic::x86_avx512_conflict_q_256;
2493 else if (Name[9] == 'q' && VecWidth == 512)
2494 IID = Intrinsic::x86_avx512_conflict_q_512;
2495 else
2496 llvm_unreachable("Unexpected intrinsic");
2497 } else if (Name.starts_with("pavg.")) {
2498 if (Name[5] == 'b' && VecWidth == 128)
2499 IID = Intrinsic::x86_sse2_pavg_b;
2500 else if (Name[5] == 'b' && VecWidth == 256)
2501 IID = Intrinsic::x86_avx2_pavg_b;
2502 else if (Name[5] == 'b' && VecWidth == 512)
2503 IID = Intrinsic::x86_avx512_pavg_b_512;
2504 else if (Name[5] == 'w' && VecWidth == 128)
2505 IID = Intrinsic::x86_sse2_pavg_w;
2506 else if (Name[5] == 'w' && VecWidth == 256)
2507 IID = Intrinsic::x86_avx2_pavg_w;
2508 else if (Name[5] == 'w' && VecWidth == 512)
2509 IID = Intrinsic::x86_avx512_pavg_w_512;
2510 else
2511 llvm_unreachable("Unexpected intrinsic");
2512 } else
2513 return false;
2514
2515 SmallVector<Value *, 4> Args(CI.args());
2516 Args.pop_back();
2517 Args.pop_back();
2518 Rep = Builder.CreateIntrinsic(IID, Args);
2519 unsigned NumArgs = CI.arg_size();
2520 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2521 CI.getArgOperand(NumArgs - 2));
2522 return true;
2523}
2524
2525/// Upgrade comment in call to inline asm that represents an objc retain release
2526/// marker.
2527void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2528 size_t Pos;
2529 if (AsmStr->find("mov\tfp") == 0 &&
2530 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2531 (Pos = AsmStr->find("# marker")) != std::string::npos) {
2532 AsmStr->replace(Pos, 1, ";");
2533 }
2534}
2535
2537 Function *F, IRBuilder<> &Builder) {
2538 Value *Rep = nullptr;
2539
2540 if (Name == "abs.i" || Name == "abs.ll") {
2541 Value *Arg = CI->getArgOperand(0);
2542 Value *Neg = Builder.CreateNeg(Arg, "neg");
2543 Value *Cmp = Builder.CreateICmpSGE(
2544 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
2545 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
2546 } else if (Name == "abs.bf16" || Name == "abs.bf16x2") {
2547 Type *Ty = (Name == "abs.bf16")
2548 ? Builder.getBFloatTy()
2549 : FixedVectorType::get(Builder.getBFloatTy(), 2);
2550 Value *Arg = Builder.CreateBitCast(CI->getArgOperand(0), Ty);
2551 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2552 Rep = Builder.CreateBitCast(Abs, CI->getType());
2553 } else if (Name == "fabs.f" || Name == "fabs.ftz.f" || Name == "fabs.d") {
2554 Intrinsic::ID IID = (Name == "fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2555 : Intrinsic::nvvm_fabs;
2556 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2557 } else if (Name.consume_front("ex2.approx.")) {
2558 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
2559 Intrinsic::ID IID = Name.starts_with("ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2560 : Intrinsic::nvvm_ex2_approx;
2561 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2562 } else if (Name.starts_with("atomic.load.add.f32.p") ||
2563 Name.starts_with("atomic.load.add.f64.p")) {
2564 Value *Ptr = CI->getArgOperand(0);
2565 Value *Val = CI->getArgOperand(1);
2566 Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
2568 } else if (Name.starts_with("atomic.load.inc.32.p") ||
2569 Name.starts_with("atomic.load.dec.32.p")) {
2570 Value *Ptr = CI->getArgOperand(0);
2571 Value *Val = CI->getArgOperand(1);
2572 auto Op = Name.starts_with("atomic.load.inc") ? AtomicRMWInst::UIncWrap
2574 Rep = Builder.CreateAtomicRMW(Op, Ptr, Val, MaybeAlign(),
2576 } else if (Name.consume_front("max.") &&
2577 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2578 Name == "ui" || Name == "ull")) {
2579 Value *Arg0 = CI->getArgOperand(0);
2580 Value *Arg1 = CI->getArgOperand(1);
2581 Value *Cmp = Name.starts_with("u")
2582 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
2583 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
2584 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
2585 } else if (Name.consume_front("min.") &&
2586 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2587 Name == "ui" || Name == "ull")) {
2588 Value *Arg0 = CI->getArgOperand(0);
2589 Value *Arg1 = CI->getArgOperand(1);
2590 Value *Cmp = Name.starts_with("u")
2591 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
2592 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
2593 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
2594 } else if (Name == "clz.ll") {
2595 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2596 Value *Arg = CI->getArgOperand(0);
2597 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->getType()},
2598 {Arg, Builder.getFalse()},
2599 /*FMFSource=*/nullptr, "ctlz");
2600 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2601 } else if (Name == "popc.ll") {
2602 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2603 // i64.
2604 Value *Arg = CI->getArgOperand(0);
2605 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->getType()},
2606 Arg, /*FMFSource=*/nullptr, "ctpop");
2607 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2608 } else if (Name == "h2f") {
2609 Rep = Builder.CreateIntrinsic(Intrinsic::convert_from_fp16,
2610 {Builder.getFloatTy()}, CI->getArgOperand(0),
2611 /*FMFSource=*/nullptr, "h2f");
2612 } else if (Name.consume_front("bitcast.") &&
2613 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2614 Name == "d2ll")) {
2615 Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
2616 } else if (Name == "rotate.b32") {
2617 Value *Arg = CI->getOperand(0);
2618 Value *ShiftAmt = CI->getOperand(1);
2619 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2620 {Arg, Arg, ShiftAmt});
2621 } else if (Name == "rotate.b64") {
2622 Type *Int64Ty = Builder.getInt64Ty();
2623 Value *Arg = CI->getOperand(0);
2624 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2625 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2626 {Arg, Arg, ZExtShiftAmt});
2627 } else if (Name == "rotate.right.b64") {
2628 Type *Int64Ty = Builder.getInt64Ty();
2629 Value *Arg = CI->getOperand(0);
2630 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2631 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2632 {Arg, Arg, ZExtShiftAmt});
2633 } else if (Name == "swap.lo.hi.b64") {
2634 Type *Int64Ty = Builder.getInt64Ty();
2635 Value *Arg = CI->getOperand(0);
2636 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2637 {Arg, Arg, Builder.getInt64(32)});
2638 } else if ((Name.consume_front("ptr.gen.to.") &&
2639 consumeNVVMPtrAddrSpace(Name)) ||
2640 (Name.consume_front("ptr.") && consumeNVVMPtrAddrSpace(Name) &&
2641 Name.starts_with(".to.gen"))) {
2642 Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType());
2643 } else if (Name.consume_front("ldg.global")) {
2644 Value *Ptr = CI->getArgOperand(0);
2645 Align PtrAlign = cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue();
2646 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2647 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2648 Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign);
2649 MDNode *MD = MDNode::get(Builder.getContext(), {});
2650 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2651 return LD;
2652 } else if (Name == "tanh.approx.f32") {
2653 // nvvm.tanh.approx.f32 -> afn llvm.tanh.f32
2654 FastMathFlags FMF;
2655 FMF.setApproxFunc();
2656 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->getArgOperand(0),
2657 FMF);
2658 } else if (Name == "barrier0" || Name == "barrier.n" || Name == "bar.sync") {
2659 Value *Arg =
2660 Name.ends_with('0') ? Builder.getInt32(0) : CI->getArgOperand(0);
2661 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2662 {}, {Arg});
2663 } else if (Name == "barrier") {
2664 Rep = Builder.CreateIntrinsic(
2665 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2666 {CI->getArgOperand(0), CI->getArgOperand(1)});
2667 } else if (Name == "barrier.sync") {
2668 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2669 {CI->getArgOperand(0)});
2670 } else if (Name == "barrier.sync.cnt") {
2671 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2672 {CI->getArgOperand(0), CI->getArgOperand(1)});
2673 } else {
2675 if (IID != Intrinsic::not_intrinsic &&
2676 !F->getReturnType()->getScalarType()->isBFloatTy()) {
2677 rename(F);
2678 Function *NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
2680 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2681 Value *Arg = CI->getArgOperand(I);
2682 Type *OldType = Arg->getType();
2683 Type *NewType = NewFn->getArg(I)->getType();
2684 Args.push_back(
2685 (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2686 ? Builder.CreateBitCast(Arg, NewType)
2687 : Arg);
2688 }
2689 Rep = Builder.CreateCall(NewFn, Args);
2690 if (F->getReturnType()->isIntegerTy())
2691 Rep = Builder.CreateBitCast(Rep, F->getReturnType());
2692 }
2693 }
2694
2695 return Rep;
2696}
2697
2699 IRBuilder<> &Builder) {
2700 LLVMContext &C = F->getContext();
2701 Value *Rep = nullptr;
2702
2703 if (Name.starts_with("sse4a.movnt.")) {
2705 Elts.push_back(
2706 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2707 MDNode *Node = MDNode::get(C, Elts);
2708
2709 Value *Arg0 = CI->getArgOperand(0);
2710 Value *Arg1 = CI->getArgOperand(1);
2711
2712 // Nontemporal (unaligned) store of the 0'th element of the float/double
2713 // vector.
2714 Value *Extract =
2715 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2716
2717 StoreInst *SI = Builder.CreateAlignedStore(Extract, Arg0, Align(1));
2718 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2719 } else if (Name.starts_with("avx.movnt.") ||
2720 Name.starts_with("avx512.storent.")) {
2722 Elts.push_back(
2723 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2724 MDNode *Node = MDNode::get(C, Elts);
2725
2726 Value *Arg0 = CI->getArgOperand(0);
2727 Value *Arg1 = CI->getArgOperand(1);
2728
2729 StoreInst *SI = Builder.CreateAlignedStore(
2730 Arg1, Arg0,
2732 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2733 } else if (Name == "sse2.storel.dq") {
2734 Value *Arg0 = CI->getArgOperand(0);
2735 Value *Arg1 = CI->getArgOperand(1);
2736
2737 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2738 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2739 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2740 Builder.CreateAlignedStore(Elt, Arg0, Align(1));
2741 } else if (Name.starts_with("sse.storeu.") ||
2742 Name.starts_with("sse2.storeu.") ||
2743 Name.starts_with("avx.storeu.")) {
2744 Value *Arg0 = CI->getArgOperand(0);
2745 Value *Arg1 = CI->getArgOperand(1);
2746 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2747 } else if (Name == "avx512.mask.store.ss") {
2748 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2749 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2750 Mask, false);
2751 } else if (Name.starts_with("avx512.mask.store")) {
2752 // "avx512.mask.storeu." or "avx512.mask.store."
2753 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2754 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2755 CI->getArgOperand(2), Aligned);
2756 } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
2757 // Upgrade packed integer vector compare intrinsics to compare instructions.
2758 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2759 bool CmpEq = Name[9] == 'e';
2760 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2761 CI->getArgOperand(0), CI->getArgOperand(1));
2762 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2763 } else if (Name.starts_with("avx512.broadcastm")) {
2764 Type *ExtTy = Type::getInt32Ty(C);
2765 if (CI->getOperand(0)->getType()->isIntegerTy(8))
2766 ExtTy = Type::getInt64Ty(C);
2767 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2768 ExtTy->getPrimitiveSizeInBits();
2769 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2770 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2771 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2772 Value *Vec = CI->getArgOperand(0);
2773 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2774 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->getType(), Elt0);
2775 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2776 } else if (Name.starts_with("avx.sqrt.p") ||
2777 Name.starts_with("sse2.sqrt.p") ||
2778 Name.starts_with("sse.sqrt.p")) {
2779 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2780 {CI->getArgOperand(0)});
2781 } else if (Name.starts_with("avx512.mask.sqrt.p")) {
2782 if (CI->arg_size() == 4 &&
2783 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2784 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2785 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2786 : Intrinsic::x86_avx512_sqrt_pd_512;
2787
2788 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
2789 Rep = Builder.CreateIntrinsic(IID, Args);
2790 } else {
2791 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2792 {CI->getArgOperand(0)});
2793 }
2794 Rep =
2795 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2796 } else if (Name.starts_with("avx512.ptestm") ||
2797 Name.starts_with("avx512.ptestnm")) {
2798 Value *Op0 = CI->getArgOperand(0);
2799 Value *Op1 = CI->getArgOperand(1);
2800 Value *Mask = CI->getArgOperand(2);
2801 Rep = Builder.CreateAnd(Op0, Op1);
2802 llvm::Type *Ty = Op0->getType();
2804 ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
2807 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2808 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2809 } else if (Name.starts_with("avx512.mask.pbroadcast")) {
2810 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2811 ->getNumElements();
2812 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2813 Rep =
2814 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2815 } else if (Name.starts_with("avx512.kunpck")) {
2816 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2817 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2818 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2819 int Indices[64];
2820 for (unsigned i = 0; i != NumElts; ++i)
2821 Indices[i] = i;
2822
2823 // First extract half of each vector. This gives better codegen than
2824 // doing it in a single shuffle.
2825 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2826 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2827 // Concat the vectors.
2828 // NOTE: Operands have to be swapped to match intrinsic definition.
2829 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2830 Rep = Builder.CreateBitCast(Rep, CI->getType());
2831 } else if (Name == "avx512.kand.w") {
2832 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2833 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2834 Rep = Builder.CreateAnd(LHS, RHS);
2835 Rep = Builder.CreateBitCast(Rep, CI->getType());
2836 } else if (Name == "avx512.kandn.w") {
2837 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2838 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2839 LHS = Builder.CreateNot(LHS);
2840 Rep = Builder.CreateAnd(LHS, RHS);
2841 Rep = Builder.CreateBitCast(Rep, CI->getType());
2842 } else if (Name == "avx512.kor.w") {
2843 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2844 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2845 Rep = Builder.CreateOr(LHS, RHS);
2846 Rep = Builder.CreateBitCast(Rep, CI->getType());
2847 } else if (Name == "avx512.kxor.w") {
2848 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2849 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2850 Rep = Builder.CreateXor(LHS, RHS);
2851 Rep = Builder.CreateBitCast(Rep, CI->getType());
2852 } else if (Name == "avx512.kxnor.w") {
2853 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2854 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2855 LHS = Builder.CreateNot(LHS);
2856 Rep = Builder.CreateXor(LHS, RHS);
2857 Rep = Builder.CreateBitCast(Rep, CI->getType());
2858 } else if (Name == "avx512.knot.w") {
2859 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2860 Rep = Builder.CreateNot(Rep);
2861 Rep = Builder.CreateBitCast(Rep, CI->getType());
2862 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
2863 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2864 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2865 Rep = Builder.CreateOr(LHS, RHS);
2866 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2867 Value *C;
2868 if (Name[14] == 'c')
2869 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2870 else
2871 C = ConstantInt::getNullValue(Builder.getInt16Ty());
2872 Rep = Builder.CreateICmpEQ(Rep, C);
2873 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2874 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2875 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2876 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2877 Name == "sse.div.ss" || Name == "sse2.div.sd") {
2878 Type *I32Ty = Type::getInt32Ty(C);
2879 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2880 ConstantInt::get(I32Ty, 0));
2881 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2882 ConstantInt::get(I32Ty, 0));
2883 Value *EltOp;
2884 if (Name.contains(".add."))
2885 EltOp = Builder.CreateFAdd(Elt0, Elt1);
2886 else if (Name.contains(".sub."))
2887 EltOp = Builder.CreateFSub(Elt0, Elt1);
2888 else if (Name.contains(".mul."))
2889 EltOp = Builder.CreateFMul(Elt0, Elt1);
2890 else
2891 EltOp = Builder.CreateFDiv(Elt0, Elt1);
2892 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2893 ConstantInt::get(I32Ty, 0));
2894 } else if (Name.starts_with("avx512.mask.pcmp")) {
2895 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2896 bool CmpEq = Name[16] == 'e';
2897 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2898 } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
2899 Type *OpTy = CI->getArgOperand(0)->getType();
2900 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2901 Intrinsic::ID IID;
2902 switch (VecWidth) {
2903 default:
2904 llvm_unreachable("Unexpected intrinsic");
2905 case 128:
2906 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
2907 break;
2908 case 256:
2909 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
2910 break;
2911 case 512:
2912 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
2913 break;
2914 }
2915
2916 Rep =
2917 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
2918 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2919 } else if (Name.starts_with("avx512.mask.fpclass.p")) {
2920 Type *OpTy = CI->getArgOperand(0)->getType();
2921 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2922 unsigned EltWidth = OpTy->getScalarSizeInBits();
2923 Intrinsic::ID IID;
2924 if (VecWidth == 128 && EltWidth == 32)
2925 IID = Intrinsic::x86_avx512_fpclass_ps_128;
2926 else if (VecWidth == 256 && EltWidth == 32)
2927 IID = Intrinsic::x86_avx512_fpclass_ps_256;
2928 else if (VecWidth == 512 && EltWidth == 32)
2929 IID = Intrinsic::x86_avx512_fpclass_ps_512;
2930 else if (VecWidth == 128 && EltWidth == 64)
2931 IID = Intrinsic::x86_avx512_fpclass_pd_128;
2932 else if (VecWidth == 256 && EltWidth == 64)
2933 IID = Intrinsic::x86_avx512_fpclass_pd_256;
2934 else if (VecWidth == 512 && EltWidth == 64)
2935 IID = Intrinsic::x86_avx512_fpclass_pd_512;
2936 else
2937 llvm_unreachable("Unexpected intrinsic");
2938
2939 Rep =
2940 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
2941 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2942 } else if (Name.starts_with("avx512.cmp.p")) {
2943 SmallVector<Value *, 4> Args(CI->args());
2944 Type *OpTy = Args[0]->getType();
2945 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2946 unsigned EltWidth = OpTy->getScalarSizeInBits();
2947 Intrinsic::ID IID;
2948 if (VecWidth == 128 && EltWidth == 32)
2949 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2950 else if (VecWidth == 256 && EltWidth == 32)
2951 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2952 else if (VecWidth == 512 && EltWidth == 32)
2953 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2954 else if (VecWidth == 128 && EltWidth == 64)
2955 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2956 else if (VecWidth == 256 && EltWidth == 64)
2957 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2958 else if (VecWidth == 512 && EltWidth == 64)
2959 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2960 else
2961 llvm_unreachable("Unexpected intrinsic");
2962
2964 if (VecWidth == 512)
2965 std::swap(Mask, Args.back());
2966 Args.push_back(Mask);
2967
2968 Rep = Builder.CreateIntrinsic(IID, Args);
2969 } else if (Name.starts_with("avx512.mask.cmp.")) {
2970 // Integer compare intrinsics.
2971 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2972 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2973 } else if (Name.starts_with("avx512.mask.ucmp.")) {
2974 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2975 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2976 } else if (Name.starts_with("avx512.cvtb2mask.") ||
2977 Name.starts_with("avx512.cvtw2mask.") ||
2978 Name.starts_with("avx512.cvtd2mask.") ||
2979 Name.starts_with("avx512.cvtq2mask.")) {
2980 Value *Op = CI->getArgOperand(0);
2981 Value *Zero = llvm::Constant::getNullValue(Op->getType());
2982 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2983 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2984 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
2985 Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
2986 Name.starts_with("avx512.mask.pabs")) {
2987 Rep = upgradeAbs(Builder, *CI);
2988 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
2989 Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
2990 Name.starts_with("avx512.mask.pmaxs")) {
2991 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
2992 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
2993 Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
2994 Name.starts_with("avx512.mask.pmaxu")) {
2995 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
2996 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
2997 Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
2998 Name.starts_with("avx512.mask.pmins")) {
2999 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
3000 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
3001 Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
3002 Name.starts_with("avx512.mask.pminu")) {
3003 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
3004 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
3005 Name == "avx512.pmulu.dq.512" ||
3006 Name.starts_with("avx512.mask.pmulu.dq.")) {
3007 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
3008 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
3009 Name == "avx512.pmul.dq.512" ||
3010 Name.starts_with("avx512.mask.pmul.dq.")) {
3011 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
3012 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
3013 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
3014 Rep =
3015 Builder.CreateSIToFP(CI->getArgOperand(1),
3016 cast<VectorType>(CI->getType())->getElementType());
3017 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3018 } else if (Name == "avx512.cvtusi2sd") {
3019 Rep =
3020 Builder.CreateUIToFP(CI->getArgOperand(1),
3021 cast<VectorType>(CI->getType())->getElementType());
3022 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3023 } else if (Name == "sse2.cvtss2sd") {
3024 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
3025 Rep = Builder.CreateFPExt(
3026 Rep, cast<VectorType>(CI->getType())->getElementType());
3027 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3028 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
3029 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
3030 Name.starts_with("avx512.mask.cvtdq2pd.") ||
3031 Name.starts_with("avx512.mask.cvtudq2pd.") ||
3032 Name.starts_with("avx512.mask.cvtdq2ps.") ||
3033 Name.starts_with("avx512.mask.cvtudq2ps.") ||
3034 Name.starts_with("avx512.mask.cvtqq2pd.") ||
3035 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
3036 Name == "avx512.mask.cvtqq2ps.256" ||
3037 Name == "avx512.mask.cvtqq2ps.512" ||
3038 Name == "avx512.mask.cvtuqq2ps.256" ||
3039 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
3040 Name == "avx.cvt.ps2.pd.256" ||
3041 Name == "avx512.mask.cvtps2pd.128" ||
3042 Name == "avx512.mask.cvtps2pd.256") {
3043 auto *DstTy = cast<FixedVectorType>(CI->getType());
3044 Rep = CI->getArgOperand(0);
3045 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3046
3047 unsigned NumDstElts = DstTy->getNumElements();
3048 if (NumDstElts < SrcTy->getNumElements()) {
3049 assert(NumDstElts == 2 && "Unexpected vector size");
3050 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
3051 }
3052
3053 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3054 bool IsUnsigned = Name.contains("cvtu");
3055 if (IsPS2PD)
3056 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
3057 else if (CI->arg_size() == 4 &&
3058 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
3059 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
3060 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3061 : Intrinsic::x86_avx512_sitofp_round;
3062 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3063 {Rep, CI->getArgOperand(3)});
3064 } else {
3065 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
3066 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
3067 }
3068
3069 if (CI->arg_size() >= 3)
3070 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3071 CI->getArgOperand(1));
3072 } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
3073 Name.starts_with("vcvtph2ps.")) {
3074 auto *DstTy = cast<FixedVectorType>(CI->getType());
3075 Rep = CI->getArgOperand(0);
3076 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3077 unsigned NumDstElts = DstTy->getNumElements();
3078 if (NumDstElts != SrcTy->getNumElements()) {
3079 assert(NumDstElts == 4 && "Unexpected vector size");
3080 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
3081 }
3082 Rep = Builder.CreateBitCast(
3083 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
3084 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
3085 if (CI->arg_size() >= 3)
3086 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3087 CI->getArgOperand(1));
3088 } else if (Name.starts_with("avx512.mask.load")) {
3089 // "avx512.mask.loadu." or "avx512.mask.load."
3090 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
3091 Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3092 CI->getArgOperand(2), Aligned);
3093 } else if (Name.starts_with("avx512.mask.expand.load.")) {
3094 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3095 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3096 ResultTy->getNumElements());
3097
3098 Rep = Builder.CreateIntrinsic(
3099 Intrinsic::masked_expandload, ResultTy,
3100 {CI->getOperand(0), MaskVec, CI->getOperand(1)});
3101 } else if (Name.starts_with("avx512.mask.compress.store.")) {
3102 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
3103 Value *MaskVec =
3104 getX86MaskVec(Builder, CI->getArgOperand(2),
3105 cast<FixedVectorType>(ResultTy)->getNumElements());
3106
3107 Rep = Builder.CreateIntrinsic(
3108 Intrinsic::masked_compressstore, ResultTy,
3109 {CI->getArgOperand(1), CI->getArgOperand(0), MaskVec});
3110 } else if (Name.starts_with("avx512.mask.compress.") ||
3111 Name.starts_with("avx512.mask.expand.")) {
3112 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3113
3114 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3115 ResultTy->getNumElements());
3116
3117 bool IsCompress = Name[12] == 'c';
3118 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3119 : Intrinsic::x86_avx512_mask_expand;
3120 Rep = Builder.CreateIntrinsic(
3121 IID, ResultTy, {CI->getOperand(0), CI->getOperand(1), MaskVec});
3122 } else if (Name.starts_with("xop.vpcom")) {
3123 bool IsSigned;
3124 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
3125 Name.ends_with("uq"))
3126 IsSigned = false;
3127 else if (Name.ends_with("b") || Name.ends_with("w") ||
3128 Name.ends_with("d") || Name.ends_with("q"))
3129 IsSigned = true;
3130 else
3131 llvm_unreachable("Unknown suffix");
3132
3133 unsigned Imm;
3134 if (CI->arg_size() == 3) {
3135 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3136 } else {
3137 Name = Name.substr(9); // strip off "xop.vpcom"
3138 if (Name.starts_with("lt"))
3139 Imm = 0;
3140 else if (Name.starts_with("le"))
3141 Imm = 1;
3142 else if (Name.starts_with("gt"))
3143 Imm = 2;
3144 else if (Name.starts_with("ge"))
3145 Imm = 3;
3146 else if (Name.starts_with("eq"))
3147 Imm = 4;
3148 else if (Name.starts_with("ne"))
3149 Imm = 5;
3150 else if (Name.starts_with("false"))
3151 Imm = 6;
3152 else if (Name.starts_with("true"))
3153 Imm = 7;
3154 else
3155 llvm_unreachable("Unknown condition");
3156 }
3157
3158 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
3159 } else if (Name.starts_with("xop.vpcmov")) {
3160 Value *Sel = CI->getArgOperand(2);
3161 Value *NotSel = Builder.CreateNot(Sel);
3162 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
3163 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
3164 Rep = Builder.CreateOr(Sel0, Sel1);
3165 } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
3166 Name.starts_with("avx512.mask.prol")) {
3167 Rep = upgradeX86Rotate(Builder, *CI, false);
3168 } else if (Name.starts_with("avx512.pror") ||
3169 Name.starts_with("avx512.mask.pror")) {
3170 Rep = upgradeX86Rotate(Builder, *CI, true);
3171 } else if (Name.starts_with("avx512.vpshld.") ||
3172 Name.starts_with("avx512.mask.vpshld") ||
3173 Name.starts_with("avx512.maskz.vpshld")) {
3174 bool ZeroMask = Name[11] == 'z';
3175 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
3176 } else if (Name.starts_with("avx512.vpshrd.") ||
3177 Name.starts_with("avx512.mask.vpshrd") ||
3178 Name.starts_with("avx512.maskz.vpshrd")) {
3179 bool ZeroMask = Name[11] == 'z';
3180 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
3181 } else if (Name == "sse42.crc32.64.8") {
3182 Value *Trunc0 =
3183 Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
3184 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3185 {Trunc0, CI->getArgOperand(1)});
3186 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
3187 } else if (Name.starts_with("avx.vbroadcast.s") ||
3188 Name.starts_with("avx512.vbroadcast.s")) {
3189 // Replace broadcasts with a series of insertelements.
3190 auto *VecTy = cast<FixedVectorType>(CI->getType());
3191 Type *EltTy = VecTy->getElementType();
3192 unsigned EltNum = VecTy->getNumElements();
3193 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
3194 Type *I32Ty = Type::getInt32Ty(C);
3195 Rep = PoisonValue::get(VecTy);
3196 for (unsigned I = 0; I < EltNum; ++I)
3197 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
3198 } else if (Name.starts_with("sse41.pmovsx") ||
3199 Name.starts_with("sse41.pmovzx") ||
3200 Name.starts_with("avx2.pmovsx") ||
3201 Name.starts_with("avx2.pmovzx") ||
3202 Name.starts_with("avx512.mask.pmovsx") ||
3203 Name.starts_with("avx512.mask.pmovzx")) {
3204 auto *DstTy = cast<FixedVectorType>(CI->getType());
3205 unsigned NumDstElts = DstTy->getNumElements();
3206
3207 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
3208 SmallVector<int, 8> ShuffleMask(NumDstElts);
3209 for (unsigned i = 0; i != NumDstElts; ++i)
3210 ShuffleMask[i] = i;
3211
3212 Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
3213
3214 bool DoSext = Name.contains("pmovsx");
3215 Rep =
3216 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3217 // If there are 3 arguments, it's a masked intrinsic so we need a select.
3218 if (CI->arg_size() == 3)
3219 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3220 CI->getArgOperand(1));
3221 } else if (Name == "avx512.mask.pmov.qd.256" ||
3222 Name == "avx512.mask.pmov.qd.512" ||
3223 Name == "avx512.mask.pmov.wb.256" ||
3224 Name == "avx512.mask.pmov.wb.512") {
3225 Type *Ty = CI->getArgOperand(1)->getType();
3226 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
3227 Rep =
3228 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3229 } else if (Name.starts_with("avx.vbroadcastf128") ||
3230 Name == "avx2.vbroadcasti128") {
3231 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
3232 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
3233 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
3234 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
3235 Value *Load = Builder.CreateAlignedLoad(VT, CI->getArgOperand(0), Align(1));
3236 if (NumSrcElts == 2)
3237 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
3238 else
3239 Rep = Builder.CreateShuffleVector(Load,
3240 ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
3241 } else if (Name.starts_with("avx512.mask.shuf.i") ||
3242 Name.starts_with("avx512.mask.shuf.f")) {
3243 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3244 Type *VT = CI->getType();
3245 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
3246 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
3247 unsigned ControlBitsMask = NumLanes - 1;
3248 unsigned NumControlBits = NumLanes / 2;
3249 SmallVector<int, 8> ShuffleMask(0);
3250
3251 for (unsigned l = 0; l != NumLanes; ++l) {
3252 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3253 // We actually need the other source.
3254 if (l >= NumLanes / 2)
3255 LaneMask += NumLanes;
3256 for (unsigned i = 0; i != NumElementsInLane; ++i)
3257 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3258 }
3259 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3260 CI->getArgOperand(1), ShuffleMask);
3261 Rep =
3262 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3263 } else if (Name.starts_with("avx512.mask.broadcastf") ||
3264 Name.starts_with("avx512.mask.broadcasti")) {
3265 unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
3266 ->getNumElements();
3267 unsigned NumDstElts =
3268 cast<FixedVectorType>(CI->getType())->getNumElements();
3269
3270 SmallVector<int, 8> ShuffleMask(NumDstElts);
3271 for (unsigned i = 0; i != NumDstElts; ++i)
3272 ShuffleMask[i] = i % NumSrcElts;
3273
3274 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3275 CI->getArgOperand(0), ShuffleMask);
3276 Rep =
3277 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3278 } else if (Name.starts_with("avx2.pbroadcast") ||
3279 Name.starts_with("avx2.vbroadcast") ||
3280 Name.starts_with("avx512.pbroadcast") ||
3281 Name.starts_with("avx512.mask.broadcast.s")) {
3282 // Replace vp?broadcasts with a vector shuffle.
3283 Value *Op = CI->getArgOperand(0);
3284 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
3285 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
3288 Rep = Builder.CreateShuffleVector(Op, M);
3289
3290 if (CI->arg_size() == 3)
3291 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3292 CI->getArgOperand(1));
3293 } else if (Name.starts_with("sse2.padds.") ||
3294 Name.starts_with("avx2.padds.") ||
3295 Name.starts_with("avx512.padds.") ||
3296 Name.starts_with("avx512.mask.padds.")) {
3297 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3298 } else if (Name.starts_with("sse2.psubs.") ||
3299 Name.starts_with("avx2.psubs.") ||
3300 Name.starts_with("avx512.psubs.") ||
3301 Name.starts_with("avx512.mask.psubs.")) {
3302 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3303 } else if (Name.starts_with("sse2.paddus.") ||
3304 Name.starts_with("avx2.paddus.") ||
3305 Name.starts_with("avx512.mask.paddus.")) {
3306 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3307 } else if (Name.starts_with("sse2.psubus.") ||
3308 Name.starts_with("avx2.psubus.") ||
3309 Name.starts_with("avx512.mask.psubus.")) {
3310 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3311 } else if (Name.starts_with("avx512.mask.palignr.")) {
3312 Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3313 CI->getArgOperand(1), CI->getArgOperand(2),
3314 CI->getArgOperand(3), CI->getArgOperand(4),
3315 false);
3316 } else if (Name.starts_with("avx512.mask.valign.")) {
3318 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3319 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
3320 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3321 // 128/256-bit shift left specified in bits.
3322 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3323 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3324 Shift / 8); // Shift is in bits.
3325 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3326 // 128/256-bit shift right specified in bits.
3327 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3328 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3329 Shift / 8); // Shift is in bits.
3330 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3331 Name == "avx512.psll.dq.512") {
3332 // 128/256/512-bit shift left specified in bytes.
3333 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3334 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3335 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3336 Name == "avx512.psrl.dq.512") {
3337 // 128/256/512-bit shift right specified in bytes.
3338 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3339 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3340 } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
3341 Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
3342 Name.starts_with("avx2.pblendd.")) {
3343 Value *Op0 = CI->getArgOperand(0);
3344 Value *Op1 = CI->getArgOperand(1);
3345 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3346 auto *VecTy = cast<FixedVectorType>(CI->getType());
3347 unsigned NumElts = VecTy->getNumElements();
3348
3349 SmallVector<int, 16> Idxs(NumElts);
3350 for (unsigned i = 0; i != NumElts; ++i)
3351 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3352
3353 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3354 } else if (Name.starts_with("avx.vinsertf128.") ||
3355 Name == "avx2.vinserti128" ||
3356 Name.starts_with("avx512.mask.insert")) {
3357 Value *Op0 = CI->getArgOperand(0);
3358 Value *Op1 = CI->getArgOperand(1);
3359 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3360 unsigned DstNumElts =
3361 cast<FixedVectorType>(CI->getType())->getNumElements();
3362 unsigned SrcNumElts =
3363 cast<FixedVectorType>(Op1->getType())->getNumElements();
3364 unsigned Scale = DstNumElts / SrcNumElts;
3365
3366 // Mask off the high bits of the immediate value; hardware ignores those.
3367 Imm = Imm % Scale;
3368
3369 // Extend the second operand into a vector the size of the destination.
3370 SmallVector<int, 8> Idxs(DstNumElts);
3371 for (unsigned i = 0; i != SrcNumElts; ++i)
3372 Idxs[i] = i;
3373 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3374 Idxs[i] = SrcNumElts;
3375 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3376
3377 // Insert the second operand into the first operand.
3378
3379 // Note that there is no guarantee that instruction lowering will actually
3380 // produce a vinsertf128 instruction for the created shuffles. In
3381 // particular, the 0 immediate case involves no lane changes, so it can
3382 // be handled as a blend.
3383
3384 // Example of shuffle mask for 32-bit elements:
3385 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3386 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3387
3388 // First fill with identify mask.
3389 for (unsigned i = 0; i != DstNumElts; ++i)
3390 Idxs[i] = i;
3391 // Then replace the elements where we need to insert.
3392 for (unsigned i = 0; i != SrcNumElts; ++i)
3393 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3394 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3395
3396 // If the intrinsic has a mask operand, handle that.
3397 if (CI->arg_size() == 5)
3398 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3399 CI->getArgOperand(3));
3400 } else if (Name.starts_with("avx.vextractf128.") ||
3401 Name == "avx2.vextracti128" ||
3402 Name.starts_with("avx512.mask.vextract")) {
3403 Value *Op0 = CI->getArgOperand(0);
3404 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3405 unsigned DstNumElts =
3406 cast<FixedVectorType>(CI->getType())->getNumElements();
3407 unsigned SrcNumElts =
3408 cast<FixedVectorType>(Op0->getType())->getNumElements();
3409 unsigned Scale = SrcNumElts / DstNumElts;
3410
3411 // Mask off the high bits of the immediate value; hardware ignores those.
3412 Imm = Imm % Scale;
3413
3414 // Get indexes for the subvector of the input vector.
3415 SmallVector<int, 8> Idxs(DstNumElts);
3416 for (unsigned i = 0; i != DstNumElts; ++i) {
3417 Idxs[i] = i + (Imm * DstNumElts);
3418 }
3419 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3420
3421 // If the intrinsic has a mask operand, handle that.
3422 if (CI->arg_size() == 4)
3423 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3424 CI->getArgOperand(2));
3425 } else if (Name.starts_with("avx512.mask.perm.df.") ||
3426 Name.starts_with("avx512.mask.perm.di.")) {
3427 Value *Op0 = CI->getArgOperand(0);
3428 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3429 auto *VecTy = cast<FixedVectorType>(CI->getType());
3430 unsigned NumElts = VecTy->getNumElements();
3431
3432 SmallVector<int, 8> Idxs(NumElts);
3433 for (unsigned i = 0; i != NumElts; ++i)
3434 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3435
3436 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3437
3438 if (CI->arg_size() == 4)
3439 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3440 CI->getArgOperand(2));
3441 } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3442 // The immediate permute control byte looks like this:
3443 // [1:0] - select 128 bits from sources for low half of destination
3444 // [2] - ignore
3445 // [3] - zero low half of destination
3446 // [5:4] - select 128 bits from sources for high half of destination
3447 // [6] - ignore
3448 // [7] - zero high half of destination
3449
3450 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3451
3452 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3453 unsigned HalfSize = NumElts / 2;
3454 SmallVector<int, 8> ShuffleMask(NumElts);
3455
3456 // Determine which operand(s) are actually in use for this instruction.
3457 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3458 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3459
3460 // If needed, replace operands based on zero mask.
3461 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3462 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3463
3464 // Permute low half of result.
3465 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3466 for (unsigned i = 0; i < HalfSize; ++i)
3467 ShuffleMask[i] = StartIndex + i;
3468
3469 // Permute high half of result.
3470 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3471 for (unsigned i = 0; i < HalfSize; ++i)
3472 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3473
3474 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3475
3476 } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3477 Name.starts_with("avx512.mask.vpermil.p") ||
3478 Name.starts_with("avx512.mask.pshuf.d.")) {
3479 Value *Op0 = CI->getArgOperand(0);
3480 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3481 auto *VecTy = cast<FixedVectorType>(CI->getType());
3482 unsigned NumElts = VecTy->getNumElements();
3483 // Calculate the size of each index in the immediate.
3484 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3485 unsigned IdxMask = ((1 << IdxSize) - 1);
3486
3487 SmallVector<int, 8> Idxs(NumElts);
3488 // Lookup the bits for this element, wrapping around the immediate every
3489 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3490 // to offset by the first index of each group.
3491 for (unsigned i = 0; i != NumElts; ++i)
3492 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3493
3494 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3495
3496 if (CI->arg_size() == 4)
3497 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3498 CI->getArgOperand(2));
3499 } else if (Name == "sse2.pshufl.w" ||
3500 Name.starts_with("avx512.mask.pshufl.w.")) {
3501 Value *Op0 = CI->getArgOperand(0);
3502 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3503 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3504
3505 SmallVector<int, 16> Idxs(NumElts);
3506 for (unsigned l = 0; l != NumElts; l += 8) {
3507 for (unsigned i = 0; i != 4; ++i)
3508 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3509 for (unsigned i = 4; i != 8; ++i)
3510 Idxs[i + l] = i + l;
3511 }
3512
3513 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3514
3515 if (CI->arg_size() == 4)
3516 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3517 CI->getArgOperand(2));
3518 } else if (Name == "sse2.pshufh.w" ||
3519 Name.starts_with("avx512.mask.pshufh.w.")) {
3520 Value *Op0 = CI->getArgOperand(0);
3521 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3522 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3523
3524 SmallVector<int, 16> Idxs(NumElts);
3525 for (unsigned l = 0; l != NumElts; l += 8) {
3526 for (unsigned i = 0; i != 4; ++i)
3527 Idxs[i + l] = i + l;
3528 for (unsigned i = 0; i != 4; ++i)
3529 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3530 }
3531
3532 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3533
3534 if (CI->arg_size() == 4)
3535 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3536 CI->getArgOperand(2));
3537 } else if (Name.starts_with("avx512.mask.shuf.p")) {
3538 Value *Op0 = CI->getArgOperand(0);
3539 Value *Op1 = CI->getArgOperand(1);
3540 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3541 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3542
3543 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3544 unsigned HalfLaneElts = NumLaneElts / 2;
3545
3546 SmallVector<int, 16> Idxs(NumElts);
3547 for (unsigned i = 0; i != NumElts; ++i) {
3548 // Base index is the starting element of the lane.
3549 Idxs[i] = i - (i % NumLaneElts);
3550 // If we are half way through the lane switch to the other source.
3551 if ((i % NumLaneElts) >= HalfLaneElts)
3552 Idxs[i] += NumElts;
3553 // Now select the specific element. By adding HalfLaneElts bits from
3554 // the immediate. Wrapping around the immediate every 8-bits.
3555 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3556 }
3557
3558 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3559
3560 Rep =
3561 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3562 } else if (Name.starts_with("avx512.mask.movddup") ||
3563 Name.starts_with("avx512.mask.movshdup") ||
3564 Name.starts_with("avx512.mask.movsldup")) {
3565 Value *Op0 = CI->getArgOperand(0);
3566 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3567 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3568
3569 unsigned Offset = 0;
3570 if (Name.starts_with("avx512.mask.movshdup."))
3571 Offset = 1;
3572
3573 SmallVector<int, 16> Idxs(NumElts);
3574 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3575 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3576 Idxs[i + l + 0] = i + l + Offset;
3577 Idxs[i + l + 1] = i + l + Offset;
3578 }
3579
3580 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3581
3582 Rep =
3583 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3584 } else if (Name.starts_with("avx512.mask.punpckl") ||
3585 Name.starts_with("avx512.mask.unpckl.")) {
3586 Value *Op0 = CI->getArgOperand(0);
3587 Value *Op1 = CI->getArgOperand(1);
3588 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3589 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3590
3591 SmallVector<int, 64> Idxs(NumElts);
3592 for (int l = 0; l != NumElts; l += NumLaneElts)
3593 for (int i = 0; i != NumLaneElts; ++i)
3594 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3595
3596 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3597
3598 Rep =
3599 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3600 } else if (Name.starts_with("avx512.mask.punpckh") ||
3601 Name.starts_with("avx512.mask.unpckh.")) {
3602 Value *Op0 = CI->getArgOperand(0);
3603 Value *Op1 = CI->getArgOperand(1);
3604 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3605 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3606
3607 SmallVector<int, 64> Idxs(NumElts);
3608 for (int l = 0; l != NumElts; l += NumLaneElts)
3609 for (int i = 0; i != NumLaneElts; ++i)
3610 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3611
3612 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3613
3614 Rep =
3615 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3616 } else if (Name.starts_with("avx512.mask.and.") ||
3617 Name.starts_with("avx512.mask.pand.")) {
3618 VectorType *FTy = cast<VectorType>(CI->getType());
3620 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3621 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3622 Rep = Builder.CreateBitCast(Rep, FTy);
3623 Rep =
3624 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3625 } else if (Name.starts_with("avx512.mask.andn.") ||
3626 Name.starts_with("avx512.mask.pandn.")) {
3627 VectorType *FTy = cast<VectorType>(CI->getType());
3629 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3630 Rep = Builder.CreateAnd(Rep,
3631 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3632 Rep = Builder.CreateBitCast(Rep, FTy);
3633 Rep =
3634 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3635 } else if (Name.starts_with("avx512.mask.or.") ||
3636 Name.starts_with("avx512.mask.por.")) {
3637 VectorType *FTy = cast<VectorType>(CI->getType());
3639 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3640 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3641 Rep = Builder.CreateBitCast(Rep, FTy);
3642 Rep =
3643 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3644 } else if (Name.starts_with("avx512.mask.xor.") ||
3645 Name.starts_with("avx512.mask.pxor.")) {
3646 VectorType *FTy = cast<VectorType>(CI->getType());
3648 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3649 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3650 Rep = Builder.CreateBitCast(Rep, FTy);
3651 Rep =
3652 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3653 } else if (Name.starts_with("avx512.mask.padd.")) {
3654 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3655 Rep =
3656 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3657 } else if (Name.starts_with("avx512.mask.psub.")) {
3658 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3659 Rep =
3660 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3661 } else if (Name.starts_with("avx512.mask.pmull.")) {
3662 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3663 Rep =
3664 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3665 } else if (Name.starts_with("avx512.mask.add.p")) {
3666 if (Name.ends_with(".512")) {
3667 Intrinsic::ID IID;
3668 if (Name[17] == 's')
3669 IID = Intrinsic::x86_avx512_add_ps_512;
3670 else
3671 IID = Intrinsic::x86_avx512_add_pd_512;
3672
3673 Rep = Builder.CreateIntrinsic(
3674 IID,
3675 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3676 } else {
3677 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3678 }
3679 Rep =
3680 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3681 } else if (Name.starts_with("avx512.mask.div.p")) {
3682 if (Name.ends_with(".512")) {
3683 Intrinsic::ID IID;
3684 if (Name[17] == 's')
3685 IID = Intrinsic::x86_avx512_div_ps_512;
3686 else
3687 IID = Intrinsic::x86_avx512_div_pd_512;
3688
3689 Rep = Builder.CreateIntrinsic(
3690 IID,
3691 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3692 } else {
3693 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3694 }
3695 Rep =
3696 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3697 } else if (Name.starts_with("avx512.mask.mul.p")) {
3698 if (Name.ends_with(".512")) {
3699 Intrinsic::ID IID;
3700 if (Name[17] == 's')
3701 IID = Intrinsic::x86_avx512_mul_ps_512;
3702 else
3703 IID = Intrinsic::x86_avx512_mul_pd_512;
3704
3705 Rep = Builder.CreateIntrinsic(
3706 IID,
3707 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3708 } else {
3709 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3710 }
3711 Rep =
3712 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3713 } else if (Name.starts_with("avx512.mask.sub.p")) {
3714 if (Name.ends_with(".512")) {
3715 Intrinsic::ID IID;
3716 if (Name[17] == 's')
3717 IID = Intrinsic::x86_avx512_sub_ps_512;
3718 else
3719 IID = Intrinsic::x86_avx512_sub_pd_512;
3720
3721 Rep = Builder.CreateIntrinsic(
3722 IID,
3723 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3724 } else {
3725 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3726 }
3727 Rep =
3728 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3729 } else if ((Name.starts_with("avx512.mask.max.p") ||
3730 Name.starts_with("avx512.mask.min.p")) &&
3731 Name.drop_front(18) == ".512") {
3732 bool IsDouble = Name[17] == 'd';
3733 bool IsMin = Name[13] == 'i';
3734 static const Intrinsic::ID MinMaxTbl[2][2] = {
3735 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3736 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3737 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3738
3739 Rep = Builder.CreateIntrinsic(
3740 IID,
3741 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3742 Rep =
3743 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3744 } else if (Name.starts_with("avx512.mask.lzcnt.")) {
3745 Rep =
3746 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->getType(),
3747 {CI->getArgOperand(0), Builder.getInt1(false)});
3748 Rep =
3749 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3750 } else if (Name.starts_with("avx512.mask.psll")) {
3751 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3752 bool IsVariable = Name[16] == 'v';
3753 char Size = Name[16] == '.' ? Name[17]
3754 : Name[17] == '.' ? Name[18]
3755 : Name[18] == '.' ? Name[19]
3756 : Name[20];
3757
3758 Intrinsic::ID IID;
3759 if (IsVariable && Name[17] != '.') {
3760 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3761 IID = Intrinsic::x86_avx2_psllv_q;
3762 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3763 IID = Intrinsic::x86_avx2_psllv_q_256;
3764 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3765 IID = Intrinsic::x86_avx2_psllv_d;
3766 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3767 IID = Intrinsic::x86_avx2_psllv_d_256;
3768 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3769 IID = Intrinsic::x86_avx512_psllv_w_128;
3770 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3771 IID = Intrinsic::x86_avx512_psllv_w_256;
3772 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3773 IID = Intrinsic::x86_avx512_psllv_w_512;
3774 else
3775 llvm_unreachable("Unexpected size");
3776 } else if (Name.ends_with(".128")) {
3777 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3778 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3779 : Intrinsic::x86_sse2_psll_d;
3780 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3781 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3782 : Intrinsic::x86_sse2_psll_q;
3783 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3784 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3785 : Intrinsic::x86_sse2_psll_w;
3786 else
3787 llvm_unreachable("Unexpected size");
3788 } else if (Name.ends_with(".256")) {
3789 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3790 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3791 : Intrinsic::x86_avx2_psll_d;
3792 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3793 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3794 : Intrinsic::x86_avx2_psll_q;
3795 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3796 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3797 : Intrinsic::x86_avx2_psll_w;
3798 else
3799 llvm_unreachable("Unexpected size");
3800 } else {
3801 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3802 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3803 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3804 : Intrinsic::x86_avx512_psll_d_512;
3805 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3806 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3807 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3808 : Intrinsic::x86_avx512_psll_q_512;
3809 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3810 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3811 : Intrinsic::x86_avx512_psll_w_512;
3812 else
3813 llvm_unreachable("Unexpected size");
3814 }
3815
3816 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3817 } else if (Name.starts_with("avx512.mask.psrl")) {
3818 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3819 bool IsVariable = Name[16] == 'v';
3820 char Size = Name[16] == '.' ? Name[17]
3821 : Name[17] == '.' ? Name[18]
3822 : Name[18] == '.' ? Name[19]
3823 : Name[20];
3824
3825 Intrinsic::ID IID;
3826 if (IsVariable && Name[17] != '.') {
3827 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3828 IID = Intrinsic::x86_avx2_psrlv_q;
3829 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3830 IID = Intrinsic::x86_avx2_psrlv_q_256;
3831 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3832 IID = Intrinsic::x86_avx2_psrlv_d;
3833 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3834 IID = Intrinsic::x86_avx2_psrlv_d_256;
3835 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3836 IID = Intrinsic::x86_avx512_psrlv_w_128;
3837 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3838 IID = Intrinsic::x86_avx512_psrlv_w_256;
3839 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3840 IID = Intrinsic::x86_avx512_psrlv_w_512;
3841 else
3842 llvm_unreachable("Unexpected size");
3843 } else if (Name.ends_with(".128")) {
3844 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3845 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3846 : Intrinsic::x86_sse2_psrl_d;
3847 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3848 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3849 : Intrinsic::x86_sse2_psrl_q;
3850 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3851 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3852 : Intrinsic::x86_sse2_psrl_w;
3853 else
3854 llvm_unreachable("Unexpected size");
3855 } else if (Name.ends_with(".256")) {
3856 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3857 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3858 : Intrinsic::x86_avx2_psrl_d;
3859 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3860 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3861 : Intrinsic::x86_avx2_psrl_q;
3862 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3863 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3864 : Intrinsic::x86_avx2_psrl_w;
3865 else
3866 llvm_unreachable("Unexpected size");
3867 } else {
3868 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3869 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
3870 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
3871 : Intrinsic::x86_avx512_psrl_d_512;
3872 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3873 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
3874 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
3875 : Intrinsic::x86_avx512_psrl_q_512;
3876 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3877 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3878 : Intrinsic::x86_avx512_psrl_w_512;
3879 else
3880 llvm_unreachable("Unexpected size");
3881 }
3882
3883 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3884 } else if (Name.starts_with("avx512.mask.psra")) {
3885 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3886 bool IsVariable = Name[16] == 'v';
3887 char Size = Name[16] == '.' ? Name[17]
3888 : Name[17] == '.' ? Name[18]
3889 : Name[18] == '.' ? Name[19]
3890 : Name[20];
3891
3892 Intrinsic::ID IID;
3893 if (IsVariable && Name[17] != '.') {
3894 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3895 IID = Intrinsic::x86_avx2_psrav_d;
3896 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3897 IID = Intrinsic::x86_avx2_psrav_d_256;
3898 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3899 IID = Intrinsic::x86_avx512_psrav_w_128;
3900 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3901 IID = Intrinsic::x86_avx512_psrav_w_256;
3902 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3903 IID = Intrinsic::x86_avx512_psrav_w_512;
3904 else
3905 llvm_unreachable("Unexpected size");
3906 } else if (Name.ends_with(".128")) {
3907 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3908 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3909 : Intrinsic::x86_sse2_psra_d;
3910 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3911 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
3912 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
3913 : Intrinsic::x86_avx512_psra_q_128;
3914 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3915 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3916 : Intrinsic::x86_sse2_psra_w;
3917 else
3918 llvm_unreachable("Unexpected size");
3919 } else if (Name.ends_with(".256")) {
3920 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3921 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3922 : Intrinsic::x86_avx2_psra_d;
3923 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3924 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
3925 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
3926 : Intrinsic::x86_avx512_psra_q_256;
3927 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3928 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3929 : Intrinsic::x86_avx2_psra_w;
3930 else
3931 llvm_unreachable("Unexpected size");
3932 } else {
3933 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3934 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
3935 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
3936 : Intrinsic::x86_avx512_psra_d_512;
3937 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3938 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
3939 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
3940 : Intrinsic::x86_avx512_psra_q_512;
3941 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3942 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3943 : Intrinsic::x86_avx512_psra_w_512;
3944 else
3945 llvm_unreachable("Unexpected size");
3946 }
3947
3948 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3949 } else if (Name.starts_with("avx512.mask.move.s")) {
3950 Rep = upgradeMaskedMove(Builder, *CI);
3951 } else if (Name.starts_with("avx512.cvtmask2")) {
3952 Rep = upgradeMaskToInt(Builder, *CI);
3953 } else if (Name.ends_with(".movntdqa")) {
3955 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
3956
3957 LoadInst *LI = Builder.CreateAlignedLoad(
3958 CI->getType(), CI->getArgOperand(0),
3960 LI->setMetadata(LLVMContext::MD_nontemporal, Node);
3961 Rep = LI;
3962 } else if (Name.starts_with("fma.vfmadd.") ||
3963 Name.starts_with("fma.vfmsub.") ||
3964 Name.starts_with("fma.vfnmadd.") ||
3965 Name.starts_with("fma.vfnmsub.")) {
3966 bool NegMul = Name[6] == 'n';
3967 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3968 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3969
3970 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3971 CI->getArgOperand(2)};
3972
3973 if (IsScalar) {
3974 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3975 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3976 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3977 }
3978
3979 if (NegMul && !IsScalar)
3980 Ops[0] = Builder.CreateFNeg(Ops[0]);
3981 if (NegMul && IsScalar)
3982 Ops[1] = Builder.CreateFNeg(Ops[1]);
3983 if (NegAcc)
3984 Ops[2] = Builder.CreateFNeg(Ops[2]);
3985
3986 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
3987
3988 if (IsScalar)
3989 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3990 } else if (Name.starts_with("fma4.vfmadd.s")) {
3991 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3992 CI->getArgOperand(2)};
3993
3994 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3995 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3996 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3997
3998 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
3999
4000 Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
4001 Rep, (uint64_t)0);
4002 } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
4003 Name.starts_with("avx512.maskz.vfmadd.s") ||
4004 Name.starts_with("avx512.mask3.vfmadd.s") ||
4005 Name.starts_with("avx512.mask3.vfmsub.s") ||
4006 Name.starts_with("avx512.mask3.vfnmsub.s")) {
4007 bool IsMask3 = Name[11] == '3';
4008 bool IsMaskZ = Name[11] == 'z';
4009 // Drop the "avx512.mask." to make it easier.
4010 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4011 bool NegMul = Name[2] == 'n';
4012 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4013
4014 Value *A = CI->getArgOperand(0);
4015 Value *B = CI->getArgOperand(1);
4016 Value *C = CI->getArgOperand(2);
4017
4018 if (NegMul && (IsMask3 || IsMaskZ))
4019 A = Builder.CreateFNeg(A);
4020 if (NegMul && !(IsMask3 || IsMaskZ))
4021 B = Builder.CreateFNeg(B);
4022 if (NegAcc)
4023 C = Builder.CreateFNeg(C);
4024
4025 A = Builder.CreateExtractElement(A, (uint64_t)0);
4026 B = Builder.CreateExtractElement(B, (uint64_t)0);
4027 C = Builder.CreateExtractElement(C, (uint64_t)0);
4028
4029 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4030 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
4031 Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
4032
4033 Intrinsic::ID IID;
4034 if (Name.back() == 'd')
4035 IID = Intrinsic::x86_avx512_vfmadd_f64;
4036 else
4037 IID = Intrinsic::x86_avx512_vfmadd_f32;
4038 Rep = Builder.CreateIntrinsic(IID, Ops);
4039 } else {
4040 Rep = Builder.CreateFMA(A, B, C);
4041 }
4042
4043 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType())
4044 : IsMask3 ? C
4045 : A;
4046
4047 // For Mask3 with NegAcc, we need to create a new extractelement that
4048 // avoids the negation above.
4049 if (NegAcc && IsMask3)
4050 PassThru =
4051 Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
4052
4053 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
4054 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
4055 (uint64_t)0);
4056 } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
4057 Name.starts_with("avx512.mask.vfnmadd.p") ||
4058 Name.starts_with("avx512.mask.vfnmsub.p") ||
4059 Name.starts_with("avx512.mask3.vfmadd.p") ||
4060 Name.starts_with("avx512.mask3.vfmsub.p") ||
4061 Name.starts_with("avx512.mask3.vfnmsub.p") ||
4062 Name.starts_with("avx512.maskz.vfmadd.p")) {
4063 bool IsMask3 = Name[11] == '3';
4064 bool IsMaskZ = Name[11] == 'z';
4065 // Drop the "avx512.mask." to make it easier.
4066 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4067 bool NegMul = Name[2] == 'n';
4068 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4069
4070 Value *A = CI->getArgOperand(0);
4071 Value *B = CI->getArgOperand(1);
4072 Value *C = CI->getArgOperand(2);
4073
4074 if (NegMul && (IsMask3 || IsMaskZ))
4075 A = Builder.CreateFNeg(A);
4076 if (NegMul && !(IsMask3 || IsMaskZ))
4077 B = Builder.CreateFNeg(B);
4078 if (NegAcc)
4079 C = Builder.CreateFNeg(C);
4080
4081 if (CI->arg_size() == 5 &&
4082 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4083 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
4084 Intrinsic::ID IID;
4085 // Check the character before ".512" in string.
4086 if (Name[Name.size() - 5] == 's')
4087 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4088 else
4089 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4090
4091 Rep = Builder.CreateIntrinsic(IID, {A, B, C, CI->getArgOperand(4)});
4092 } else {
4093 Rep = Builder.CreateFMA(A, B, C);
4094 }
4095
4096 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4097 : IsMask3 ? CI->getArgOperand(2)
4098 : CI->getArgOperand(0);
4099
4100 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4101 } else if (Name.starts_with("fma.vfmsubadd.p")) {
4102 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4103 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4104 Intrinsic::ID IID;
4105 if (VecWidth == 128 && EltWidth == 32)
4106 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4107 else if (VecWidth == 256 && EltWidth == 32)
4108 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4109 else if (VecWidth == 128 && EltWidth == 64)
4110 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4111 else if (VecWidth == 256 && EltWidth == 64)
4112 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4113 else
4114 llvm_unreachable("Unexpected intrinsic");
4115
4116 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4117 CI->getArgOperand(2)};
4118 Ops[2] = Builder.CreateFNeg(Ops[2]);
4119 Rep = Builder.CreateIntrinsic(IID, Ops);
4120 } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
4121 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
4122 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
4123 Name.starts_with("avx512.mask3.vfmsubadd.p")) {
4124 bool IsMask3 = Name[11] == '3';
4125 bool IsMaskZ = Name[11] == 'z';
4126 // Drop the "avx512.mask." to make it easier.
4127 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4128 bool IsSubAdd = Name[3] == 's';
4129 if (CI->arg_size() == 5) {
4130 Intrinsic::ID IID;
4131 // Check the character before ".512" in string.
4132 if (Name[Name.size() - 5] == 's')
4133 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4134 else
4135 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4136
4137 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4138 CI->getArgOperand(2), CI->getArgOperand(4)};
4139 if (IsSubAdd)
4140 Ops[2] = Builder.CreateFNeg(Ops[2]);
4141
4142 Rep = Builder.CreateIntrinsic(IID, Ops);
4143 } else {
4144 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4145
4146 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4147 CI->getArgOperand(2)};
4148
4150 CI->getModule(), Intrinsic::fma, Ops[0]->getType());
4151 Value *Odd = Builder.CreateCall(FMA, Ops);
4152 Ops[2] = Builder.CreateFNeg(Ops[2]);
4153 Value *Even = Builder.CreateCall(FMA, Ops);
4154
4155 if (IsSubAdd)
4156 std::swap(Even, Odd);
4157
4158 SmallVector<int, 32> Idxs(NumElts);
4159 for (int i = 0; i != NumElts; ++i)
4160 Idxs[i] = i + (i % 2) * NumElts;
4161
4162 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4163 }
4164
4165 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4166 : IsMask3 ? CI->getArgOperand(2)
4167 : CI->getArgOperand(0);
4168
4169 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4170 } else if (Name.starts_with("avx512.mask.pternlog.") ||
4171 Name.starts_with("avx512.maskz.pternlog.")) {
4172 bool ZeroMask = Name[11] == 'z';
4173 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4174 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4175 Intrinsic::ID IID;
4176 if (VecWidth == 128 && EltWidth == 32)
4177 IID = Intrinsic::x86_avx512_pternlog_d_128;
4178 else if (VecWidth == 256 && EltWidth == 32)
4179 IID = Intrinsic::x86_avx512_pternlog_d_256;
4180 else if (VecWidth == 512 && EltWidth == 32)
4181 IID = Intrinsic::x86_avx512_pternlog_d_512;
4182 else if (VecWidth == 128 && EltWidth == 64)
4183 IID = Intrinsic::x86_avx512_pternlog_q_128;
4184 else if (VecWidth == 256 && EltWidth == 64)
4185 IID = Intrinsic::x86_avx512_pternlog_q_256;
4186 else if (VecWidth == 512 && EltWidth == 64)
4187 IID = Intrinsic::x86_avx512_pternlog_q_512;
4188 else
4189 llvm_unreachable("Unexpected intrinsic");
4190
4191 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4192 CI->getArgOperand(2), CI->getArgOperand(3)};
4193 Rep = Builder.CreateIntrinsic(IID, Args);
4194 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4195 : CI->getArgOperand(0);
4196 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
4197 } else if (Name.starts_with("avx512.mask.vpmadd52") ||
4198 Name.starts_with("avx512.maskz.vpmadd52")) {
4199 bool ZeroMask = Name[11] == 'z';
4200 bool High = Name[20] == 'h' || Name[21] == 'h';
4201 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4202 Intrinsic::ID IID;
4203 if (VecWidth == 128 && !High)
4204 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4205 else if (VecWidth == 256 && !High)
4206 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4207 else if (VecWidth == 512 && !High)
4208 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4209 else if (VecWidth == 128 && High)
4210 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4211 else if (VecWidth == 256 && High)
4212 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4213 else if (VecWidth == 512 && High)
4214 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4215 else
4216 llvm_unreachable("Unexpected intrinsic");
4217
4218 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4219 CI->getArgOperand(2)};
4220 Rep = Builder.CreateIntrinsic(IID, Args);
4221 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4222 : CI->getArgOperand(0);
4223 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4224 } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
4225 Name.starts_with("avx512.mask.vpermt2var.") ||
4226 Name.starts_with("avx512.maskz.vpermt2var.")) {
4227 bool ZeroMask = Name[11] == 'z';
4228 bool IndexForm = Name[17] == 'i';
4229 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
4230 } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
4231 Name.starts_with("avx512.maskz.vpdpbusd.") ||
4232 Name.starts_with("avx512.mask.vpdpbusds.") ||
4233 Name.starts_with("avx512.maskz.vpdpbusds.")) {
4234 bool ZeroMask = Name[11] == 'z';
4235 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4236 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4237 Intrinsic::ID IID;
4238 if (VecWidth == 128 && !IsSaturating)
4239 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4240 else if (VecWidth == 256 && !IsSaturating)
4241 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4242 else if (VecWidth == 512 && !IsSaturating)
4243 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4244 else if (VecWidth == 128 && IsSaturating)
4245 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4246 else if (VecWidth == 256 && IsSaturating)
4247 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4248 else if (VecWidth == 512 && IsSaturating)
4249 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4250 else
4251 llvm_unreachable("Unexpected intrinsic");
4252
4253 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4254 CI->getArgOperand(2)};
4255
4256 // Input arguments types were incorrectly set to vectors of i32 before but
4257 // they should be vectors of i8. Insert bit cast when encountering the old
4258 // types
4259 if (Args[1]->getType()->isVectorTy() &&
4260 cast<VectorType>(Args[1]->getType())
4261 ->getElementType()
4262 ->isIntegerTy(32) &&
4263 Args[2]->getType()->isVectorTy() &&
4264 cast<VectorType>(Args[2]->getType())
4265 ->getElementType()
4266 ->isIntegerTy(32)) {
4267 Type *NewArgType = nullptr;
4268 if (VecWidth == 128)
4269 NewArgType = VectorType::get(Builder.getInt8Ty(), 16, false);
4270 else if (VecWidth == 256)
4271 NewArgType = VectorType::get(Builder.getInt8Ty(), 32, false);
4272 else if (VecWidth == 512)
4273 NewArgType = VectorType::get(Builder.getInt8Ty(), 64, false);
4274 else
4275 llvm_unreachable("Unexpected vector bit width");
4276
4277 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4278 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4279 }
4280
4281 Rep = Builder.CreateIntrinsic(IID, Args);
4282 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4283 : CI->getArgOperand(0);
4284 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4285 } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
4286 Name.starts_with("avx512.maskz.vpdpwssd.") ||
4287 Name.starts_with("avx512.mask.vpdpwssds.") ||
4288 Name.starts_with("avx512.maskz.vpdpwssds.")) {
4289 bool ZeroMask = Name[11] == 'z';
4290 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4291 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4292 Intrinsic::ID IID;
4293 if (VecWidth == 128 && !IsSaturating)
4294 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4295 else if (VecWidth == 256 && !IsSaturating)
4296 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4297 else if (VecWidth == 512 && !IsSaturating)
4298 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4299 else if (VecWidth == 128 && IsSaturating)
4300 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4301 else if (VecWidth == 256 && IsSaturating)
4302 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4303 else if (VecWidth == 512 && IsSaturating)
4304 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4305 else
4306 llvm_unreachable("Unexpected intrinsic");
4307
4308 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4309 CI->getArgOperand(2)};
4310 Rep = Builder.CreateIntrinsic(IID, Args);
4311 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4312 : CI->getArgOperand(0);
4313 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4314 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4315 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4316 Name == "subborrow.u32" || Name == "subborrow.u64") {
4317 Intrinsic::ID IID;
4318 if (Name[0] == 'a' && Name.back() == '2')
4319 IID = Intrinsic::x86_addcarry_32;
4320 else if (Name[0] == 'a' && Name.back() == '4')
4321 IID = Intrinsic::x86_addcarry_64;
4322 else if (Name[0] == 's' && Name.back() == '2')
4323 IID = Intrinsic::x86_subborrow_32;
4324 else if (Name[0] == 's' && Name.back() == '4')
4325 IID = Intrinsic::x86_subborrow_64;
4326 else
4327 llvm_unreachable("Unexpected intrinsic");
4328
4329 // Make a call with 3 operands.
4330 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4331 CI->getArgOperand(2)};
4332 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4333
4334 // Extract the second result and store it.
4335 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4336 Builder.CreateAlignedStore(Data, CI->getArgOperand(3), Align(1));
4337 // Replace the original call result with the first result of the new call.
4338 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4339
4340 CI->replaceAllUsesWith(CF);
4341 Rep = nullptr;
4342 } else if (Name.starts_with("avx512.mask.") &&
4343 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4344 // Rep will be updated by the call in the condition.
4345 }
4346
4347 return Rep;
4348}
4349
4351 Function *F, IRBuilder<> &Builder) {
4352 if (Name.starts_with("neon.bfcvt")) {
4353 if (Name.starts_with("neon.bfcvtn2")) {
4354 SmallVector<int, 32> LoMask(4);
4355 std::iota(LoMask.begin(), LoMask.end(), 0);
4356 SmallVector<int, 32> ConcatMask(8);
4357 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4358 Value *Inactive = Builder.CreateShuffleVector(CI->getOperand(0), LoMask);
4359 Value *Trunc =
4360 Builder.CreateFPTrunc(CI->getOperand(1), Inactive->getType());
4361 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4362 } else if (Name.starts_with("neon.bfcvtn")) {
4363 SmallVector<int, 32> ConcatMask(8);
4364 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4365 Type *V4BF16 =
4366 FixedVectorType::get(Type::getBFloatTy(F->getContext()), 4);
4367 Value *Trunc = Builder.CreateFPTrunc(CI->getOperand(0), V4BF16);
4368 dbgs() << "Trunc: " << *Trunc << "\n";
4369 return Builder.CreateShuffleVector(
4370 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
4371 } else {
4372 return Builder.CreateFPTrunc(CI->getOperand(0),
4373 Type::getBFloatTy(F->getContext()));
4374 }
4375 } else if (Name.starts_with("sve.fcvt")) {
4376 Intrinsic::ID NewID =
4378 .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4379 .Case("sve.fcvtnt.bf16f32",
4380 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4382 if (NewID == Intrinsic::not_intrinsic)
4383 llvm_unreachable("Unhandled Intrinsic!");
4384
4385 SmallVector<Value *, 3> Args(CI->args());
4386
4387 // The original intrinsics incorrectly used a predicate based on the
4388 // smallest element type rather than the largest.
4389 Type *BadPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 8);
4390 Type *GoodPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4);
4391
4392 if (Args[1]->getType() != BadPredTy)
4393 llvm_unreachable("Unexpected predicate type!");
4394
4395 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4396 BadPredTy, Args[1]);
4397 Args[1] = Builder.CreateIntrinsic(
4398 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4399
4400 return Builder.CreateIntrinsic(NewID, Args, /*FMFSource=*/nullptr,
4401 CI->getName());
4402 }
4403
4404 llvm_unreachable("Unhandled Intrinsic!");
4405}
4406
4408 IRBuilder<> &Builder) {
4409 if (Name == "mve.vctp64.old") {
4410 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4411 // correct type.
4412 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4413 CI->getArgOperand(0),
4414 /*FMFSource=*/nullptr, CI->getName());
4415 Value *C1 = Builder.CreateIntrinsic(
4416 Intrinsic::arm_mve_pred_v2i,
4417 {VectorType::get(Builder.getInt1Ty(), 2, false)}, VCTP);
4418 return Builder.CreateIntrinsic(
4419 Intrinsic::arm_mve_pred_i2v,
4420 {VectorType::get(Builder.getInt1Ty(), 4, false)}, C1);
4421 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4422 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4423 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4424 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4425 Name ==
4426 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4427 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4428 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4429 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4430 Name ==
4431 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4432 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4433 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4434 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4435 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4436 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4437 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4438 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4439 std::vector<Type *> Tys;
4440 unsigned ID = CI->getIntrinsicID();
4441 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
4442 switch (ID) {
4443 case Intrinsic::arm_mve_mull_int_predicated:
4444 case Intrinsic::arm_mve_vqdmull_predicated:
4445 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4446 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
4447 break;
4448 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4449 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4450 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4451 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
4452 V2I1Ty};
4453 break;
4454 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4455 Tys = {CI->getType(), CI->getOperand(0)->getType(),
4456 CI->getOperand(1)->getType(), V2I1Ty};
4457 break;
4458 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4459 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
4460 CI->getOperand(2)->getType(), V2I1Ty};
4461 break;
4462 case Intrinsic::arm_cde_vcx1q_predicated:
4463 case Intrinsic::arm_cde_vcx1qa_predicated:
4464 case Intrinsic::arm_cde_vcx2q_predicated:
4465 case Intrinsic::arm_cde_vcx2qa_predicated:
4466 case Intrinsic::arm_cde_vcx3q_predicated:
4467 case Intrinsic::arm_cde_vcx3qa_predicated:
4468 Tys = {CI->getOperand(1)->getType(), V2I1Ty};
4469 break;
4470 default:
4471 llvm_unreachable("Unhandled Intrinsic!");
4472 }
4473
4474 std::vector<Value *> Ops;
4475 for (Value *Op : CI->args()) {
4476 Type *Ty = Op->getType();
4477 if (Ty->getScalarSizeInBits() == 1) {
4478 Value *C1 = Builder.CreateIntrinsic(
4479 Intrinsic::arm_mve_pred_v2i,
4480 {VectorType::get(Builder.getInt1Ty(), 4, false)}, Op);
4481 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4482 }
4483 Ops.push_back(Op);
4484 }
4485
4486 return Builder.CreateIntrinsic(ID, Tys, Ops, /*FMFSource=*/nullptr,
4487 CI->getName());
4488 }
4489 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4490}
4491
4492// These are expected to have the arguments:
4493// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4494//
4495// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4496//
4498 Function *F, IRBuilder<> &Builder) {
4499 AtomicRMWInst::BinOp RMWOp =
4501 .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
4502 .StartsWith("ds.fmin", AtomicRMWInst::FMin)
4503 .StartsWith("ds.fmax", AtomicRMWInst::FMax)
4504 .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
4505 .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
4506 .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
4507 .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd)
4508 .StartsWith("global.atomic.fmin", AtomicRMWInst::FMin)
4509 .StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin)
4510 .StartsWith("global.atomic.fmax", AtomicRMWInst::FMax)
4511 .StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax);
4512
4513 unsigned NumOperands = CI->getNumOperands();
4514 if (NumOperands < 3) // Malformed bitcode.
4515 return nullptr;
4516
4517 Value *Ptr = CI->getArgOperand(0);
4518 PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
4519 if (!PtrTy) // Malformed.
4520 return nullptr;
4521
4522 Value *Val = CI->getArgOperand(1);
4523 if (Val->getType() != CI->getType()) // Malformed.
4524 return nullptr;
4525
4526 ConstantInt *OrderArg = nullptr;
4527 bool IsVolatile = false;
4528
4529 // These should have 5 arguments (plus the callee). A separate version of the
4530 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4531 if (NumOperands > 3)
4532 OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
4533
4534 // Ignore scope argument at 3
4535
4536 if (NumOperands > 5) {
4537 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
4538 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4539 }
4540
4542 if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
4543 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4546
4547 LLVMContext &Ctx = F->getContext();
4548
4549 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4550 Type *RetTy = CI->getType();
4551 if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
4552 if (VT->getElementType()->isIntegerTy(16)) {
4553 VectorType *AsBF16 =
4554 VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
4555 Val = Builder.CreateBitCast(Val, AsBF16);
4556 }
4557 }
4558
4559 // The scope argument never really worked correctly. Use agent as the most
4560 // conservative option which should still always produce the instruction.
4561 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4562 AtomicRMWInst *RMW =
4563 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4564
4565 unsigned AddrSpace = PtrTy->getAddressSpace();
4566 if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
4567 MDNode *EmptyMD = MDNode::get(F->getContext(), {});
4568 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
4569 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
4570 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
4571 }
4572
4573 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
4574 MDBuilder MDB(F->getContext());
4575 MDNode *RangeNotPrivate =
4578 RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4579 }
4580
4581 if (IsVolatile)
4582 RMW->setVolatile(true);
4583
4584 return Builder.CreateBitCast(RMW, RetTy);
4585}
4586
4587/// Helper to unwrap intrinsic call MetadataAsValue operands. Return as a
4588/// plain MDNode, as it's the verifier's job to check these are the correct
4589/// types later.
4590static MDNode *unwrapMAVOp(CallBase *CI, unsigned Op) {
4591 if (Op < CI->arg_size()) {
4592 if (MetadataAsValue *MAV =
4594 Metadata *MD = MAV->getMetadata();
4595 return dyn_cast_if_present<MDNode>(MD);
4596 }
4597 }
4598 return nullptr;
4599}
4600
4601/// Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
4602static Metadata *unwrapMAVMetadataOp(CallBase *CI, unsigned Op) {
4603 if (Op < CI->arg_size())
4605 return MAV->getMetadata();
4606 return nullptr;
4607}
4608
4610 // The MDNode attached to this instruction might not be the correct type,
4611 // as the verifier has not yet be run. Fetch it as a bare MDNode.
4612 return I->getDebugLoc().getAsMDNode();
4613}
4614
4615/// Convert debug intrinsic calls to non-instruction debug records.
4616/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4617/// \p CI - The debug intrinsic call.
4619 DbgRecord *DR = nullptr;
4620 if (Name == "label") {
4622 CI->getDebugLoc());
4623 } else if (Name == "assign") {
4626 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), unwrapMAVOp(CI, 3),
4627 unwrapMAVMetadataOp(CI, 4),
4628 /*The address is a Value ref, it will be stored as a Metadata */
4629 unwrapMAVOp(CI, 5), getDebugLocSafe(CI));
4630 } else if (Name == "declare") {
4633 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), nullptr, nullptr, nullptr,
4634 getDebugLocSafe(CI));
4635 } else if (Name == "addr") {
4636 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4637 MDNode *ExprNode = unwrapMAVOp(CI, 2);
4638 // Don't try to add something to the expression if it's not an expression.
4639 // Instead, allow the verifier to fail later.
4640 if (DIExpression *Expr = dyn_cast<DIExpression>(ExprNode)) {
4641 ExprNode = DIExpression::append(Expr, dwarf::DW_OP_deref);
4642 }
4645 unwrapMAVOp(CI, 1), ExprNode, nullptr, nullptr, nullptr,
4646 getDebugLocSafe(CI));
4647 } else if (Name == "value") {
4648 // An old version of dbg.value had an extra offset argument.
4649 unsigned VarOp = 1;
4650 unsigned ExprOp = 2;
4651 if (CI->arg_size() == 4) {
4653 // Nonzero offset dbg.values get dropped without a replacement.
4654 if (!Offset || !Offset->isZeroValue())
4655 return;
4656 VarOp = 2;
4657 ExprOp = 3;
4658 }
4661 unwrapMAVOp(CI, VarOp), unwrapMAVOp(CI, ExprOp), nullptr, nullptr,
4662 nullptr, getDebugLocSafe(CI));
4663 }
4664 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4665 CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());
4666}
4667
4668/// Upgrade a call to an old intrinsic. All argument and return casting must be
4669/// provided to seamlessly integrate with existing context.
4671 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
4672 // checks the callee's function type matches. It's likely we need to handle
4673 // type changes here.
4675 if (!F)
4676 return;
4677
4678 LLVMContext &C = CI->getContext();
4679 IRBuilder<> Builder(C);
4680 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
4681
4682 if (!NewFn) {
4683 // Get the Function's name.
4684 StringRef Name = F->getName();
4685
4686 assert(Name.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'");
4687 Name = Name.substr(5);
4688
4689 bool IsX86 = Name.consume_front("x86.");
4690 bool IsNVVM = Name.consume_front("nvvm.");
4691 bool IsAArch64 = Name.consume_front("aarch64.");
4692 bool IsARM = Name.consume_front("arm.");
4693 bool IsAMDGCN = Name.consume_front("amdgcn.");
4694 bool IsDbg = Name.consume_front("dbg.");
4695 Value *Rep = nullptr;
4696
4697 if (!IsX86 && Name == "stackprotectorcheck") {
4698 Rep = nullptr;
4699 } else if (IsNVVM) {
4700 Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
4701 } else if (IsX86) {
4702 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
4703 } else if (IsAArch64) {
4704 Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
4705 } else if (IsARM) {
4706 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
4707 } else if (IsAMDGCN) {
4708 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
4709 } else if (IsDbg) {
4711 } else {
4712 llvm_unreachable("Unknown function for CallBase upgrade.");
4713 }
4714
4715 if (Rep)
4716 CI->replaceAllUsesWith(Rep);
4717 CI->eraseFromParent();
4718 return;
4719 }
4720
4721 const auto &DefaultCase = [&]() -> void {
4722 if (F == NewFn)
4723 return;
4724
4725 if (CI->getFunctionType() == NewFn->getFunctionType()) {
4726 // Handle generic mangling change.
4727 assert(
4728 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
4729 "Unknown function for CallBase upgrade and isn't just a name change");
4730 CI->setCalledFunction(NewFn);
4731 return;
4732 }
4733
4734 // This must be an upgrade from a named to a literal struct.
4735 if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
4736 assert(OldST != NewFn->getReturnType() &&
4737 "Return type must have changed");
4738 assert(OldST->getNumElements() ==
4739 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
4740 "Must have same number of elements");
4741
4742 SmallVector<Value *> Args(CI->args());
4743 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
4744 NewCI->setAttributes(CI->getAttributes());
4745 Value *Res = PoisonValue::get(OldST);
4746 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4747 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4748 Res = Builder.CreateInsertValue(Res, Elem, Idx);
4749 }
4750 CI->replaceAllUsesWith(Res);
4751 CI->eraseFromParent();
4752 return;
4753 }
4754
4755 // We're probably about to produce something invalid. Let the verifier catch
4756 // it instead of dying here.
4757 CI->setCalledOperand(
4759 return;
4760 };
4761 CallInst *NewCall = nullptr;
4762 switch (NewFn->getIntrinsicID()) {
4763 default: {
4764 DefaultCase();
4765 return;
4766 }
4767 case Intrinsic::arm_neon_vst1:
4768 case Intrinsic::arm_neon_vst2:
4769 case Intrinsic::arm_neon_vst3:
4770 case Intrinsic::arm_neon_vst4:
4771 case Intrinsic::arm_neon_vst2lane:
4772 case Intrinsic::arm_neon_vst3lane:
4773 case Intrinsic::arm_neon_vst4lane: {
4774 SmallVector<Value *, 4> Args(CI->args());
4775 NewCall = Builder.CreateCall(NewFn, Args);
4776 break;
4777 }
4778 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4779 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4780 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4781 LLVMContext &Ctx = F->getParent()->getContext();
4782 SmallVector<Value *, 4> Args(CI->args());
4783 Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
4784 cast<ConstantInt>(Args[3])->getZExtValue());
4785 NewCall = Builder.CreateCall(NewFn, Args);
4786 break;
4787 }
4788 case Intrinsic::aarch64_sve_ld3_sret:
4789 case Intrinsic::aarch64_sve_ld4_sret:
4790 case Intrinsic::aarch64_sve_ld2_sret: {
4791 StringRef Name = F->getName();
4792 Name = Name.substr(5);
4793 unsigned N = StringSwitch<unsigned>(Name)
4794 .StartsWith("aarch64.sve.ld2", 2)
4795 .StartsWith("aarch64.sve.ld3", 3)
4796 .StartsWith("aarch64.sve.ld4", 4)
4797 .Default(0);
4798 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4799 unsigned MinElts = RetTy->getMinNumElements() / N;
4800 SmallVector<Value *, 2> Args(CI->args());
4801 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
4802 Value *Ret = llvm::PoisonValue::get(RetTy);
4803 for (unsigned I = 0; I < N; I++) {
4804 Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
4805 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, I * MinElts);
4806 }
4807 NewCall = dyn_cast<CallInst>(Ret);
4808 break;
4809 }
4810
4811 case Intrinsic::coro_end: {
4812 SmallVector<Value *, 3> Args(CI->args());
4813 Args.push_back(ConstantTokenNone::get(CI->getContext()));
4814 NewCall = Builder.CreateCall(NewFn, Args);
4815 break;
4816 }
4817
4818 case Intrinsic::vector_extract: {
4819 StringRef Name = F->getName();
4820 Name = Name.substr(5); // Strip llvm
4821 if (!Name.starts_with("aarch64.sve.tuple.get")) {
4822 DefaultCase();
4823 return;
4824 }
4825 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4826 unsigned MinElts = RetTy->getMinNumElements();
4827 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4828 Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4829 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
4830 break;
4831 }
4832
4833 case Intrinsic::vector_insert: {
4834 StringRef Name = F->getName();
4835 Name = Name.substr(5);
4836 if (!Name.starts_with("aarch64.sve.tuple")) {
4837 DefaultCase();
4838 return;
4839 }
4840 if (Name.starts_with("aarch64.sve.tuple.set")) {
4841 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4842 auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
4843 Value *NewIdx =
4844 ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
4845 NewCall = Builder.CreateCall(
4846 NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
4847 break;
4848 }
4849 if (Name.starts_with("aarch64.sve.tuple.create")) {
4850 unsigned N = StringSwitch<unsigned>(Name)
4851 .StartsWith("aarch64.sve.tuple.create2", 2)
4852 .StartsWith("aarch64.sve.tuple.create3", 3)
4853 .StartsWith("aarch64.sve.tuple.create4", 4)
4854 .Default(0);
4855 assert(N > 1 && "Create is expected to be between 2-4");
4856 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4857 Value *Ret = llvm::PoisonValue::get(RetTy);
4858 unsigned MinElts = RetTy->getMinNumElements() / N;
4859 for (unsigned I = 0; I < N; I++) {
4860 Value *V = CI->getArgOperand(I);
4861 Ret = Builder.CreateInsertVector(RetTy, Ret, V, I * MinElts);
4862 }
4863 NewCall = dyn_cast<CallInst>(Ret);
4864 }
4865 break;
4866 }
4867
4868 case Intrinsic::arm_neon_bfdot:
4869 case Intrinsic::arm_neon_bfmmla:
4870 case Intrinsic::arm_neon_bfmlalb:
4871 case Intrinsic::arm_neon_bfmlalt:
4872 case Intrinsic::aarch64_neon_bfdot:
4873 case Intrinsic::aarch64_neon_bfmmla:
4874 case Intrinsic::aarch64_neon_bfmlalb:
4875 case Intrinsic::aarch64_neon_bfmlalt: {
4877 assert(CI->arg_size() == 3 &&
4878 "Mismatch between function args and call args");
4879 size_t OperandWidth =
4881 assert((OperandWidth == 64 || OperandWidth == 128) &&
4882 "Unexpected operand width");
4883 Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
4884 auto Iter = CI->args().begin();
4885 Args.push_back(*Iter++);
4886 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4887 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4888 NewCall = Builder.CreateCall(NewFn, Args);
4889 break;
4890 }
4891
4892 case Intrinsic::bitreverse:
4893 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4894 break;
4895
4896 case Intrinsic::ctlz:
4897 case Intrinsic::cttz:
4898 assert(CI->arg_size() == 1 &&
4899 "Mismatch between function args and call args");
4900 NewCall =
4901 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
4902 break;
4903
4904 case Intrinsic::objectsize: {
4905 Value *NullIsUnknownSize =
4906 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
4907 Value *Dynamic =
4908 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
4909 NewCall = Builder.CreateCall(
4910 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
4911 break;
4912 }
4913
4914 case Intrinsic::ctpop:
4915 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4916 break;
4917
4918 case Intrinsic::convert_from_fp16:
4919 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4920 break;
4921
4922 case Intrinsic::dbg_value: {
4923 StringRef Name = F->getName();
4924 Name = Name.substr(5); // Strip llvm.
4925 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
4926 if (Name.starts_with("dbg.addr")) {
4928 cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
4929 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
4930 NewCall =
4931 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4932 MetadataAsValue::get(C, Expr)});
4933 break;
4934 }
4935
4936 // Upgrade from the old version that had an extra offset argument.
4937 assert(CI->arg_size() == 4);
4938 // Drop nonzero offsets instead of attempting to upgrade them.
4940 if (Offset->isZeroValue()) {
4941 NewCall = Builder.CreateCall(
4942 NewFn,
4943 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
4944 break;
4945 }
4946 CI->eraseFromParent();
4947 return;
4948 }
4949
4950 case Intrinsic::ptr_annotation:
4951 // Upgrade from versions that lacked the annotation attribute argument.
4952 if (CI->arg_size() != 4) {
4953 DefaultCase();
4954 return;
4955 }
4956
4957 // Create a new call with an added null annotation attribute argument.
4958 NewCall = Builder.CreateCall(
4959 NewFn,
4960 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
4961 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
4962 NewCall->takeName(CI);
4963 CI->replaceAllUsesWith(NewCall);
4964 CI->eraseFromParent();
4965 return;
4966
4967 case Intrinsic::var_annotation:
4968 // Upgrade from versions that lacked the annotation attribute argument.
4969 if (CI->arg_size() != 4) {
4970 DefaultCase();
4971 return;
4972 }
4973 // Create a new call with an added null annotation attribute argument.
4974 NewCall = Builder.CreateCall(
4975 NewFn,
4976 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
4977 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
4978 NewCall->takeName(CI);
4979 CI->replaceAllUsesWith(NewCall);
4980 CI->eraseFromParent();
4981 return;
4982
4983 case Intrinsic::riscv_aes32dsi:
4984 case Intrinsic::riscv_aes32dsmi:
4985 case Intrinsic::riscv_aes32esi:
4986 case Intrinsic::riscv_aes32esmi:
4987 case Intrinsic::riscv_sm4ks:
4988 case Intrinsic::riscv_sm4ed: {
4989 // The last argument to these intrinsics used to be i8 and changed to i32.
4990 // The type overload for sm4ks and sm4ed was removed.
4991 Value *Arg2 = CI->getArgOperand(2);
4992 if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
4993 return;
4994
4995 Value *Arg0 = CI->getArgOperand(0);
4996 Value *Arg1 = CI->getArgOperand(1);
4997 if (CI->getType()->isIntegerTy(64)) {
4998 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
4999 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5000 }
5001
5002 Arg2 = ConstantInt::get(Type::getInt32Ty(C),
5003 cast<ConstantInt>(Arg2)->getZExtValue());
5004
5005 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5006 Value *Res = NewCall;
5007 if (Res->getType() != CI->getType())
5008 Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5009 NewCall->takeName(CI);
5010 CI->replaceAllUsesWith(Res);
5011 CI->eraseFromParent();
5012 return;
5013 }
5014 case Intrinsic::nvvm_mapa_shared_cluster: {
5015 // Create a new call with the correct address space.
5016 NewCall =
5017 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)});
5018 Value *Res = NewCall;
5019 Res = Builder.CreateAddrSpaceCast(
5020 Res, Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED));
5021 NewCall->takeName(CI);
5022 CI->replaceAllUsesWith(Res);
5023 CI->eraseFromParent();
5024 return;
5025 }
5026 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5027 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5028 // Create a new call with the correct address space.
5029 SmallVector<Value *, 4> Args(CI->args());
5030 Args[0] = Builder.CreateAddrSpaceCast(
5031 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5032
5033 NewCall = Builder.CreateCall(NewFn, Args);
5034 NewCall->takeName(CI);
5035 CI->replaceAllUsesWith(NewCall);
5036 CI->eraseFromParent();
5037 return;
5038 }
5039 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5040 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5041 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5042 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5043 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5044 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5045 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5046 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5047 SmallVector<Value *, 16> Args(CI->args());
5048
5049 // Create AddrSpaceCast to shared_cluster if needed.
5050 // This handles case (1) in shouldUpgradeNVPTXTMAG2SIntrinsics().
5051 unsigned AS = CI->getArgOperand(0)->getType()->getPointerAddressSpace();
5053 Args[0] = Builder.CreateAddrSpaceCast(
5054 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5055
5056 // Attach the flag argument for cta_group, with a
5057 // default value of 0. This handles case (2) in
5058 // shouldUpgradeNVPTXTMAG2SIntrinsics().
5059 size_t NumArgs = CI->arg_size();
5060 Value *FlagArg = CI->getArgOperand(NumArgs - 3);
5061 if (!FlagArg->getType()->isIntegerTy(1))
5062 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5063
5064 NewCall = Builder.CreateCall(NewFn, Args);
5065 NewCall->takeName(CI);
5066 CI->replaceAllUsesWith(NewCall);
5067 CI->eraseFromParent();
5068 return;
5069 }
5070 case Intrinsic::riscv_sha256sig0:
5071 case Intrinsic::riscv_sha256sig1:
5072 case Intrinsic::riscv_sha256sum0:
5073 case Intrinsic::riscv_sha256sum1:
5074 case Intrinsic::riscv_sm3p0:
5075 case Intrinsic::riscv_sm3p1: {
5076 // The last argument to these intrinsics used to be i8 and changed to i32.
5077 // The type overload for sm4ks and sm4ed was removed.
5078 if (!CI->getType()->isIntegerTy(64))
5079 return;
5080
5081 Value *Arg =
5082 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
5083
5084 NewCall = Builder.CreateCall(NewFn, Arg);
5085 Value *Res =
5086 Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5087 NewCall->takeName(CI);
5088 CI->replaceAllUsesWith(Res);
5089 CI->eraseFromParent();
5090 return;
5091 }
5092
5093 case Intrinsic::x86_xop_vfrcz_ss:
5094 case Intrinsic::x86_xop_vfrcz_sd:
5095 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
5096 break;
5097
5098 case Intrinsic::x86_xop_vpermil2pd:
5099 case Intrinsic::x86_xop_vpermil2ps:
5100 case Intrinsic::x86_xop_vpermil2pd_256:
5101 case Intrinsic::x86_xop_vpermil2ps_256: {
5102 SmallVector<Value *, 4> Args(CI->args());
5103 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
5104 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
5105 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5106 NewCall = Builder.CreateCall(NewFn, Args);
5107 break;
5108 }
5109
5110 case Intrinsic::x86_sse41_ptestc:
5111 case Intrinsic::x86_sse41_ptestz:
5112 case Intrinsic::x86_sse41_ptestnzc: {
5113 // The arguments for these intrinsics used to be v4f32, and changed
5114 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
5115 // So, the only thing required is a bitcast for both arguments.
5116 // First, check the arguments have the old type.
5117 Value *Arg0 = CI->getArgOperand(0);
5118 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
5119 return;
5120
5121 // Old intrinsic, add bitcasts
5122 Value *Arg1 = CI->getArgOperand(1);
5123
5124 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
5125
5126 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
5127 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
5128
5129 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5130 break;
5131 }
5132
5133 case Intrinsic::x86_rdtscp: {
5134 // This used to take 1 arguments. If we have no arguments, it is already
5135 // upgraded.
5136 if (CI->getNumOperands() == 0)
5137 return;
5138
5139 NewCall = Builder.CreateCall(NewFn);
5140 // Extract the second result and store it.
5141 Value *Data = Builder.CreateExtractValue(NewCall, 1);
5142 Builder.CreateAlignedStore(Data, CI->getArgOperand(0), Align(1));
5143 // Replace the original call result with the first result of the new call.
5144 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5145
5146 NewCall->takeName(CI);
5147 CI->replaceAllUsesWith(TSC);
5148 CI->eraseFromParent();
5149 return;
5150 }
5151
5152 case Intrinsic::x86_sse41_insertps:
5153 case Intrinsic::x86_sse41_dppd:
5154 case Intrinsic::x86_sse41_dpps:
5155 case Intrinsic::x86_sse41_mpsadbw:
5156 case Intrinsic::x86_avx_dp_ps_256:
5157 case Intrinsic::x86_avx2_mpsadbw: {
5158 // Need to truncate the last argument from i32 to i8 -- this argument models
5159 // an inherently 8-bit immediate operand to these x86 instructions.
5160 SmallVector<Value *, 4> Args(CI->args());
5161
5162 // Replace the last argument with a trunc.
5163 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
5164 NewCall = Builder.CreateCall(NewFn, Args);
5165 break;
5166 }
5167
5168 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5169 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5170 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5171 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5172 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5173 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5174 SmallVector<Value *, 4> Args(CI->args());
5175 unsigned NumElts =
5176 cast<FixedVectorType>(Args[0]->getType())->getNumElements();
5177 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
5178
5179 NewCall = Builder.CreateCall(NewFn, Args);
5180 Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
5181
5182 NewCall->takeName(CI);
5183 CI->replaceAllUsesWith(Res);
5184 CI->eraseFromParent();
5185 return;
5186 }
5187
5188 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5189 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5190 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5191 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5192 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5193 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5194 SmallVector<Value *, 4> Args(CI->args());
5195 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
5196 if (NewFn->getIntrinsicID() ==
5197 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5198 Args[1] = Builder.CreateBitCast(
5199 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5200
5201 NewCall = Builder.CreateCall(NewFn, Args);
5202 Value *Res = Builder.CreateBitCast(
5203 NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
5204
5205 NewCall->takeName(CI);
5206 CI->replaceAllUsesWith(Res);
5207 CI->eraseFromParent();
5208 return;
5209 }
5210 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5211 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5212 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5213 SmallVector<Value *, 4> Args(CI->args());
5214 unsigned NumElts =
5215 cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
5216 Args[1] = Builder.CreateBitCast(
5217 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5218 Args[2] = Builder.CreateBitCast(
5219 Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5220
5221 NewCall = Builder.CreateCall(NewFn, Args);
5222 break;
5223 }
5224
5225 case Intrinsic::thread_pointer: {
5226 NewCall = Builder.CreateCall(NewFn, {});
5227 break;
5228 }
5229
5230 case Intrinsic::memcpy:
5231 case Intrinsic::memmove:
5232 case Intrinsic::memset: {
5233 // We have to make sure that the call signature is what we're expecting.
5234 // We only want to change the old signatures by removing the alignment arg:
5235 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
5236 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
5237 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
5238 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
5239 // Note: i8*'s in the above can be any pointer type
5240 if (CI->arg_size() != 5) {
5241 DefaultCase();
5242 return;
5243 }
5244 // Remove alignment argument (3), and add alignment attributes to the
5245 // dest/src pointers.
5246 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
5247 CI->getArgOperand(2), CI->getArgOperand(4)};
5248 NewCall = Builder.CreateCall(NewFn, Args);
5249 AttributeList OldAttrs = CI->getAttributes();
5250 AttributeList NewAttrs = AttributeList::get(
5251 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5252 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5253 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5254 NewCall->setAttributes(NewAttrs);
5255 auto *MemCI = cast<MemIntrinsic>(NewCall);
5256 // All mem intrinsics support dest alignment.
5258 MemCI->setDestAlignment(Align->getMaybeAlignValue());
5259 // Memcpy/Memmove also support source alignment.
5260 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
5261 MTI->setSourceAlignment(Align->getMaybeAlignValue());
5262 break;
5263 }
5264
5265 case Intrinsic::masked_load:
5266 case Intrinsic::masked_gather:
5267 case Intrinsic::masked_store:
5268 case Intrinsic::masked_scatter: {
5269 if (CI->arg_size() != 4) {
5270 DefaultCase();
5271 return;
5272 }
5273
5274 auto GetMaybeAlign = [](Value *Op) {
5275 if (auto *CI = dyn_cast<ConstantInt>(Op)) {
5276 uint64_t Val = CI->getZExtValue();
5277 if (Val == 0)
5278 return MaybeAlign();
5279 if (isPowerOf2_64(Val))
5280 return MaybeAlign(Val);
5281 }
5282 reportFatalUsageError("Invalid alignment argument");
5283 };
5284 auto GetAlign = [&](Value *Op) {
5285 MaybeAlign Align = GetMaybeAlign(Op);
5286 if (Align)
5287 return *Align;
5288 reportFatalUsageError("Invalid zero alignment argument");
5289 };
5290
5291 const DataLayout &DL = CI->getDataLayout();
5292 switch (NewFn->getIntrinsicID()) {
5293 case Intrinsic::masked_load:
5294 NewCall = Builder.CreateMaskedLoad(
5295 CI->getType(), CI->getArgOperand(0), GetAlign(CI->getArgOperand(1)),
5296 CI->getArgOperand(2), CI->getArgOperand(3));
5297 break;
5298 case Intrinsic::masked_gather:
5299 NewCall = Builder.CreateMaskedGather(
5300 CI->getType(), CI->getArgOperand(0),
5301 DL.getValueOrABITypeAlignment(GetMaybeAlign(CI->getArgOperand(1)),
5302 CI->getType()->getScalarType()),
5303 CI->getArgOperand(2), CI->getArgOperand(3));
5304 break;
5305 case Intrinsic::masked_store:
5306 NewCall = Builder.CreateMaskedStore(
5307 CI->getArgOperand(0), CI->getArgOperand(1),
5308 GetAlign(CI->getArgOperand(2)), CI->getArgOperand(3));
5309 break;
5310 case Intrinsic::masked_scatter:
5311 NewCall = Builder.CreateMaskedScatter(
5312 CI->getArgOperand(0), CI->getArgOperand(1),
5313 DL.getValueOrABITypeAlignment(
5314 GetMaybeAlign(CI->getArgOperand(2)),
5315 CI->getArgOperand(0)->getType()->getScalarType()),
5316 CI->getArgOperand(3));
5317 break;
5318 default:
5319 llvm_unreachable("Unexpected intrinsic ID");
5320 }
5321 // Previous metadata is still valid.
5322 NewCall->copyMetadata(*CI);
5323 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5324 break;
5325 }
5326
5327 case Intrinsic::lifetime_start:
5328 case Intrinsic::lifetime_end: {
5329 if (CI->arg_size() != 2) {
5330 DefaultCase();
5331 return;
5332 }
5333
5334 Value *Ptr = CI->getArgOperand(1);
5335 // Try to strip pointer casts, such that the lifetime works on an alloca.
5336 Ptr = Ptr->stripPointerCasts();
5337 if (isa<AllocaInst>(Ptr)) {
5338 // Don't use NewFn, as we might have looked through an addrspacecast.
5339 if (NewFn->getIntrinsicID() == Intrinsic::lifetime_start)
5340 NewCall = Builder.CreateLifetimeStart(Ptr);
5341 else
5342 NewCall = Builder.CreateLifetimeEnd(Ptr);
5343 break;
5344 }
5345
5346 // Otherwise remove the lifetime marker.
5347 CI->eraseFromParent();
5348 return;
5349 }
5350
5351 case Intrinsic::x86_avx512_vpdpbusd_128:
5352 case Intrinsic::x86_avx512_vpdpbusd_256:
5353 case Intrinsic::x86_avx512_vpdpbusd_512:
5354 case Intrinsic::x86_avx512_vpdpbusds_128:
5355 case Intrinsic::x86_avx512_vpdpbusds_256:
5356 case Intrinsic::x86_avx512_vpdpbusds_512:
5357 case Intrinsic::x86_avx2_vpdpbssd_128:
5358 case Intrinsic::x86_avx2_vpdpbssd_256:
5359 case Intrinsic::x86_avx10_vpdpbssd_512:
5360 case Intrinsic::x86_avx2_vpdpbssds_128:
5361 case Intrinsic::x86_avx2_vpdpbssds_256:
5362 case Intrinsic::x86_avx10_vpdpbssds_512:
5363 case Intrinsic::x86_avx2_vpdpbsud_128:
5364 case Intrinsic::x86_avx2_vpdpbsud_256:
5365 case Intrinsic::x86_avx10_vpdpbsud_512:
5366 case Intrinsic::x86_avx2_vpdpbsuds_128:
5367 case Intrinsic::x86_avx2_vpdpbsuds_256:
5368 case Intrinsic::x86_avx10_vpdpbsuds_512:
5369 case Intrinsic::x86_avx2_vpdpbuud_128:
5370 case Intrinsic::x86_avx2_vpdpbuud_256:
5371 case Intrinsic::x86_avx10_vpdpbuud_512:
5372 case Intrinsic::x86_avx2_vpdpbuuds_128:
5373 case Intrinsic::x86_avx2_vpdpbuuds_256:
5374 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5375 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 8;
5376 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5377 CI->getArgOperand(2)};
5378 Type *NewArgType = VectorType::get(Builder.getInt8Ty(), NumElts, false);
5379 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5380 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5381
5382 NewCall = Builder.CreateCall(NewFn, Args);
5383 break;
5384 }
5385 }
5386 assert(NewCall && "Should have either set this variable or returned through "
5387 "the default case");
5388 NewCall->takeName(CI);
5389 CI->replaceAllUsesWith(NewCall);
5390 CI->eraseFromParent();
5391}
5392
5394 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
5395
5396 // Check if this function should be upgraded and get the replacement function
5397 // if there is one.
5398 Function *NewFn;
5399 if (UpgradeIntrinsicFunction(F, NewFn)) {
5400 // Replace all users of the old function with the new function or new
5401 // instructions. This is not a range loop because the call is deleted.
5402 for (User *U : make_early_inc_range(F->users()))
5403 if (CallBase *CB = dyn_cast<CallBase>(U))
5404 UpgradeIntrinsicCall(CB, NewFn);
5405
5406 // Remove old function, no longer used, from the module.
5407 if (F != NewFn)
5408 F->eraseFromParent();
5409 }
5410}
5411
5413 const unsigned NumOperands = MD.getNumOperands();
5414 if (NumOperands == 0)
5415 return &MD; // Invalid, punt to a verifier error.
5416
5417 // Check if the tag uses struct-path aware TBAA format.
5418 if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
5419 return &MD;
5420
5421 auto &Context = MD.getContext();
5422 if (NumOperands == 3) {
5423 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
5424 MDNode *ScalarType = MDNode::get(Context, Elts);
5425 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
5426 Metadata *Elts2[] = {ScalarType, ScalarType,
5429 MD.getOperand(2)};
5430 return MDNode::get(Context, Elts2);
5431 }
5432 // Create a MDNode <MD, MD, offset 0>
5434 Type::getInt64Ty(Context)))};
5435 return MDNode::get(Context, Elts);
5436}
5437
5439 Instruction *&Temp) {
5440 if (Opc != Instruction::BitCast)
5441 return nullptr;
5442
5443 Temp = nullptr;
5444 Type *SrcTy = V->getType();
5445 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5446 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5447 LLVMContext &Context = V->getContext();
5448
5449 // We have no information about target data layout, so we assume that
5450 // the maximum pointer size is 64bit.
5451 Type *MidTy = Type::getInt64Ty(Context);
5452 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
5453
5454 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
5455 }
5456
5457 return nullptr;
5458}
5459
5461 if (Opc != Instruction::BitCast)
5462 return nullptr;
5463
5464 Type *SrcTy = C->getType();
5465 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5466 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5467 LLVMContext &Context = C->getContext();
5468
5469 // We have no information about target data layout, so we assume that
5470 // the maximum pointer size is 64bit.
5471 Type *MidTy = Type::getInt64Ty(Context);
5472
5474 DestTy);
5475 }
5476
5477 return nullptr;
5478}
5479
5480/// Check the debug info version number, if it is out-dated, drop the debug
5481/// info. Return true if module is modified.
5484 return false;
5485
5486 llvm::TimeTraceScope timeScope("Upgrade debug info");
5487 // We need to get metadata before the module is verified (i.e., getModuleFlag
5488 // makes assumptions that we haven't verified yet). Carefully extract the flag
5489 // from the metadata.
5490 unsigned Version = 0;
5491 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5492 auto OpIt = find_if(ModFlags->operands(), [](const MDNode *Flag) {
5493 if (Flag->getNumOperands() < 3)
5494 return false;
5495 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5496 return K->getString() == "Debug Info Version";
5497 return false;
5498 });
5499 if (OpIt != ModFlags->op_end()) {
5500 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5501 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(ValOp))
5502 Version = CI->getZExtValue();
5503 }
5504 }
5505
5507 bool BrokenDebugInfo = false;
5508 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
5509 report_fatal_error("Broken module found, compilation aborted!");
5510 if (!BrokenDebugInfo)
5511 // Everything is ok.
5512 return false;
5513 else {
5514 // Diagnose malformed debug info.
5516 M.getContext().diagnose(Diag);
5517 }
5518 }
5519 bool Modified = StripDebugInfo(M);
5521 // Diagnose a version mismatch.
5523 M.getContext().diagnose(DiagVersion);
5524 }
5525 return Modified;
5526}
5527
5528static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
5529 GlobalValue *GV, const Metadata *V) {
5530 Function *F = cast<Function>(GV);
5531
5532 constexpr StringLiteral DefaultValue = "1";
5533 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5534 unsigned Length = 0;
5535
5536 if (F->hasFnAttribute(Attr)) {
5537 // We expect the existing attribute to have the form "x[,y[,z]]". Here we
5538 // parse these elements placing them into Vect3
5539 StringRef S = F->getFnAttribute(Attr).getValueAsString();
5540 for (; Length < 3 && !S.empty(); Length++) {
5541 auto [Part, Rest] = S.split(',');
5542 Vect3[Length] = Part.trim();
5543 S = Rest;
5544 }
5545 }
5546
5547 const unsigned Dim = DimC - 'x';
5548 assert(Dim < 3 && "Unexpected dim char");
5549
5550 const uint64_t VInt = mdconst::extract<ConstantInt>(V)->getZExtValue();
5551
5552 // local variable required for StringRef in Vect3 to point to.
5553 const std::string VStr = llvm::utostr(VInt);
5554 Vect3[Dim] = VStr;
5555 Length = std::max(Length, Dim + 1);
5556
5557 const std::string NewAttr = llvm::join(ArrayRef(Vect3, Length), ",");
5558 F->addFnAttr(Attr, NewAttr);
5559}
5560
5561static inline bool isXYZ(StringRef S) {
5562 return S == "x" || S == "y" || S == "z";
5563}
5564
5566 const Metadata *V) {
5567 if (K == "kernel") {
5569 cast<Function>(GV)->setCallingConv(CallingConv::PTX_Kernel);
5570 return true;
5571 }
5572 if (K == "align") {
5573 // V is a bitfeild specifying two 16-bit values. The alignment value is
5574 // specfied in low 16-bits, The index is specified in the high bits. For the
5575 // index, 0 indicates the return value while higher values correspond to
5576 // each parameter (idx = param + 1).
5577 const uint64_t AlignIdxValuePair =
5578 mdconst::extract<ConstantInt>(V)->getZExtValue();
5579 const unsigned Idx = (AlignIdxValuePair >> 16);
5580 const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF);
5581 cast<Function>(GV)->addAttributeAtIndex(
5582 Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign));
5583 return true;
5584 }
5585 if (K == "maxclusterrank" || K == "cluster_max_blocks") {
5586 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5587 cast<Function>(GV)->addFnAttr("nvvm.maxclusterrank", llvm::utostr(CV));
5588 return true;
5589 }
5590 if (K == "minctasm") {
5591 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5592 cast<Function>(GV)->addFnAttr("nvvm.minctasm", llvm::utostr(CV));
5593 return true;
5594 }
5595 if (K == "maxnreg") {
5596 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5597 cast<Function>(GV)->addFnAttr("nvvm.maxnreg", llvm::utostr(CV));
5598 return true;
5599 }
5600 if (K.consume_front("maxntid") && isXYZ(K)) {
5601 upgradeNVVMFnVectorAttr("nvvm.maxntid", K[0], GV, V);
5602 return true;
5603 }
5604 if (K.consume_front("reqntid") && isXYZ(K)) {
5605 upgradeNVVMFnVectorAttr("nvvm.reqntid", K[0], GV, V);
5606 return true;
5607 }
5608 if (K.consume_front("cluster_dim_") && isXYZ(K)) {
5609 upgradeNVVMFnVectorAttr("nvvm.cluster_dim", K[0], GV, V);
5610 return true;
5611 }
5612 if (K == "grid_constant") {
5613 const auto Attr = Attribute::get(GV->getContext(), "nvvm.grid_constant");
5614 for (const auto &Op : cast<MDNode>(V)->operands()) {
5615 // For some reason, the index is 1-based in the metadata. Good thing we're
5616 // able to auto-upgrade it!
5617 const auto Index = mdconst::extract<ConstantInt>(Op)->getZExtValue() - 1;
5618 cast<Function>(GV)->addParamAttr(Index, Attr);
5619 }
5620 return true;
5621 }
5622
5623 return false;
5624}
5625
5627 NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations");
5628 if (!NamedMD)
5629 return;
5630
5631 SmallVector<MDNode *, 8> NewNodes;
5633 for (MDNode *MD : NamedMD->operands()) {
5634 if (!SeenNodes.insert(MD).second)
5635 continue;
5636
5637 auto *GV = mdconst::dyn_extract_or_null<GlobalValue>(MD->getOperand(0));
5638 if (!GV)
5639 continue;
5640
5641 assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
5642
5643 SmallVector<Metadata *, 8> NewOperands{MD->getOperand(0)};
5644 // Each nvvm.annotations metadata entry will be of the following form:
5645 // !{ ptr @gv, !"key1", value1, !"key2", value2, ... }
5646 // start index = 1, to skip the global variable key
5647 // increment = 2, to skip the value for each property-value pairs
5648 for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
5649 MDString *K = cast<MDString>(MD->getOperand(j));
5650 const MDOperand &V = MD->getOperand(j + 1);
5651 bool Upgraded = upgradeSingleNVVMAnnotation(GV, K->getString(), V);
5652 if (!Upgraded)
5653 NewOperands.append({K, V});
5654 }
5655
5656 if (NewOperands.size() > 1)
5657 NewNodes.push_back(MDNode::get(M.getContext(), NewOperands));
5658 }
5659
5660 NamedMD->clearOperands();
5661 for (MDNode *N : NewNodes)
5662 NamedMD->addOperand(N);
5663}
5664
5665/// This checks for objc retain release marker which should be upgraded. It
5666/// returns true if module is modified.
5668 bool Changed = false;
5669 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
5670 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
5671 if (ModRetainReleaseMarker) {
5672 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
5673 if (Op) {
5674 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
5675 if (ID) {
5676 SmallVector<StringRef, 4> ValueComp;
5677 ID->getString().split(ValueComp, "#");
5678 if (ValueComp.size() == 2) {
5679 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
5680 ID = MDString::get(M.getContext(), NewValue);
5681 }
5682 M.addModuleFlag(Module::Error, MarkerKey, ID);
5683 M.eraseNamedMetadata(ModRetainReleaseMarker);
5684 Changed = true;
5685 }
5686 }
5687 }
5688 return Changed;
5689}
5690
5692 // This lambda converts normal function calls to ARC runtime functions to
5693 // intrinsic calls.
5694 auto UpgradeToIntrinsic = [&](const char *OldFunc,
5695 llvm::Intrinsic::ID IntrinsicFunc) {
5696 Function *Fn = M.getFunction(OldFunc);
5697
5698 if (!Fn)
5699 return;
5700
5701 Function *NewFn =
5702 llvm::Intrinsic::getOrInsertDeclaration(&M, IntrinsicFunc);
5703
5704 for (User *U : make_early_inc_range(Fn->users())) {
5706 if (!CI || CI->getCalledFunction() != Fn)
5707 continue;
5708
5709 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
5710 FunctionType *NewFuncTy = NewFn->getFunctionType();
5712
5713 // Don't upgrade the intrinsic if it's not valid to bitcast the return
5714 // value to the return type of the old function.
5715 if (NewFuncTy->getReturnType() != CI->getType() &&
5716 !CastInst::castIsValid(Instruction::BitCast, CI,
5717 NewFuncTy->getReturnType()))
5718 continue;
5719
5720 bool InvalidCast = false;
5721
5722 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
5723 Value *Arg = CI->getArgOperand(I);
5724
5725 // Bitcast argument to the parameter type of the new function if it's
5726 // not a variadic argument.
5727 if (I < NewFuncTy->getNumParams()) {
5728 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
5729 // to the parameter type of the new function.
5730 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
5731 NewFuncTy->getParamType(I))) {
5732 InvalidCast = true;
5733 break;
5734 }
5735 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
5736 }
5737 Args.push_back(Arg);
5738 }
5739
5740 if (InvalidCast)
5741 continue;
5742
5743 // Create a call instruction that calls the new function.
5744 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
5745 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5746 NewCall->takeName(CI);
5747
5748 // Bitcast the return value back to the type of the old call.
5749 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
5750
5751 if (!CI->use_empty())
5752 CI->replaceAllUsesWith(NewRetVal);
5753 CI->eraseFromParent();
5754 }
5755
5756 if (Fn->use_empty())
5757 Fn->eraseFromParent();
5758 };
5759
5760 // Unconditionally convert a call to "clang.arc.use" to a call to
5761 // "llvm.objc.clang.arc.use".
5762 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
5763
5764 // Upgrade the retain release marker. If there is no need to upgrade
5765 // the marker, that means either the module is already new enough to contain
5766 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
5768 return;
5769
5770 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
5771 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
5772 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
5773 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
5774 {"objc_autoreleaseReturnValue",
5775 llvm::Intrinsic::objc_autoreleaseReturnValue},
5776 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
5777 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
5778 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
5779 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
5780 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
5781 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
5782 {"objc_release", llvm::Intrinsic::objc_release},
5783 {"objc_retain", llvm::Intrinsic::objc_retain},
5784 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
5785 {"objc_retainAutoreleaseReturnValue",
5786 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
5787 {"objc_retainAutoreleasedReturnValue",
5788 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
5789 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
5790 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
5791 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
5792 {"objc_unsafeClaimAutoreleasedReturnValue",
5793 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
5794 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
5795 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
5796 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
5797 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
5798 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
5799 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
5800 {"objc_arc_annotation_topdown_bbstart",
5801 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
5802 {"objc_arc_annotation_topdown_bbend",
5803 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
5804 {"objc_arc_annotation_bottomup_bbstart",
5805 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
5806 {"objc_arc_annotation_bottomup_bbend",
5807 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
5808
5809 for (auto &I : RuntimeFuncs)
5810 UpgradeToIntrinsic(I.first, I.second);
5811}
5812
5814 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
5815 if (!ModFlags)
5816 return false;
5817
5818 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
5819 bool HasSwiftVersionFlag = false;
5820 uint8_t SwiftMajorVersion, SwiftMinorVersion;
5821 uint32_t SwiftABIVersion;
5822 auto Int8Ty = Type::getInt8Ty(M.getContext());
5823 auto Int32Ty = Type::getInt32Ty(M.getContext());
5824
5825 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
5826 MDNode *Op = ModFlags->getOperand(I);
5827 if (Op->getNumOperands() != 3)
5828 continue;
5829 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
5830 if (!ID)
5831 continue;
5832 auto SetBehavior = [&](Module::ModFlagBehavior B) {
5833 Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
5834 Type::getInt32Ty(M.getContext()), B)),
5835 MDString::get(M.getContext(), ID->getString()),
5836 Op->getOperand(2)};
5837 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5838 Changed = true;
5839 };
5840
5841 if (ID->getString() == "Objective-C Image Info Version")
5842 HasObjCFlag = true;
5843 if (ID->getString() == "Objective-C Class Properties")
5844 HasClassProperties = true;
5845 // Upgrade PIC from Error/Max to Min.
5846 if (ID->getString() == "PIC Level") {
5847 if (auto *Behavior =
5849 uint64_t V = Behavior->getLimitedValue();
5850 if (V == Module::Error || V == Module::Max)
5851 SetBehavior(Module::Min);
5852 }
5853 }
5854 // Upgrade "PIE Level" from Error to Max.
5855 if (ID->getString() == "PIE Level")
5856 if (auto *Behavior =
5858 if (Behavior->getLimitedValue() == Module::Error)
5859 SetBehavior(Module::Max);
5860
5861 // Upgrade branch protection and return address signing module flags. The
5862 // module flag behavior for these fields were Error and now they are Min.
5863 if (ID->getString() == "branch-target-enforcement" ||
5864 ID->getString().starts_with("sign-return-address")) {
5865 if (auto *Behavior =
5867 if (Behavior->getLimitedValue() == Module::Error) {
5868 Type *Int32Ty = Type::getInt32Ty(M.getContext());
5869 Metadata *Ops[3] = {
5870 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
5871 Op->getOperand(1), Op->getOperand(2)};
5872 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5873 Changed = true;
5874 }
5875 }
5876 }
5877
5878 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
5879 // section name so that llvm-lto will not complain about mismatching
5880 // module flags that is functionally the same.
5881 if (ID->getString() == "Objective-C Image Info Section") {
5882 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
5883 SmallVector<StringRef, 4> ValueComp;
5884 Value->getString().split(ValueComp, " ");
5885 if (ValueComp.size() != 1) {
5886 std::string NewValue;
5887 for (auto &S : ValueComp)
5888 NewValue += S.str();
5889 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
5890 MDString::get(M.getContext(), NewValue)};
5891 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5892 Changed = true;
5893 }
5894 }
5895 }
5896
5897 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
5898 // If the higher bits are set, it adds new module flag for swift info.
5899 if (ID->getString() == "Objective-C Garbage Collection") {
5900 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
5901 if (Md) {
5902 assert(Md->getValue() && "Expected non-empty metadata");
5903 auto Type = Md->getValue()->getType();
5904 if (Type == Int8Ty)
5905 continue;
5906 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
5907 if ((Val & 0xff) != Val) {
5908 HasSwiftVersionFlag = true;
5909 SwiftABIVersion = (Val & 0xff00) >> 8;
5910 SwiftMajorVersion = (Val & 0xff000000) >> 24;
5911 SwiftMinorVersion = (Val & 0xff0000) >> 16;
5912 }
5913 Metadata *Ops[3] = {
5915 Op->getOperand(1),
5916 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
5917 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5918 Changed = true;
5919 }
5920 }
5921
5922 if (ID->getString() == "amdgpu_code_object_version") {
5923 Metadata *Ops[3] = {
5924 Op->getOperand(0),
5925 MDString::get(M.getContext(), "amdhsa_code_object_version"),
5926 Op->getOperand(2)};
5927 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5928 Changed = true;
5929 }
5930 }
5931
5932 // "Objective-C Class Properties" is recently added for Objective-C. We
5933 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
5934 // flag of value 0, so we can correclty downgrade this flag when trying to
5935 // link an ObjC bitcode without this module flag with an ObjC bitcode with
5936 // this module flag.
5937 if (HasObjCFlag && !HasClassProperties) {
5938 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
5939 (uint32_t)0);
5940 Changed = true;
5941 }
5942
5943 if (HasSwiftVersionFlag) {
5944 M.addModuleFlag(Module::Error, "Swift ABI Version",
5945 SwiftABIVersion);
5946 M.addModuleFlag(Module::Error, "Swift Major Version",
5947 ConstantInt::get(Int8Ty, SwiftMajorVersion));
5948 M.addModuleFlag(Module::Error, "Swift Minor Version",
5949 ConstantInt::get(Int8Ty, SwiftMinorVersion));
5950 Changed = true;
5951 }
5952
5953 return Changed;
5954}
5955
5957 auto TrimSpaces = [](StringRef Section) -> std::string {
5958 SmallVector<StringRef, 5> Components;
5959 Section.split(Components, ',');
5960
5961 SmallString<32> Buffer;
5962 raw_svector_ostream OS(Buffer);
5963
5964 for (auto Component : Components)
5965 OS << ',' << Component.trim();
5966
5967 return std::string(OS.str().substr(1));
5968 };
5969
5970 for (auto &GV : M.globals()) {
5971 if (!GV.hasSection())
5972 continue;
5973
5974 StringRef Section = GV.getSection();
5975
5976 if (!Section.starts_with("__DATA, __objc_catlist"))
5977 continue;
5978
5979 // __DATA, __objc_catlist, regular, no_dead_strip
5980 // __DATA,__objc_catlist,regular,no_dead_strip
5981 GV.setSection(TrimSpaces(Section));
5982 }
5983}
5984
5985namespace {
5986// Prior to LLVM 10.0, the strictfp attribute could be used on individual
5987// callsites within a function that did not also have the strictfp attribute.
5988// Since 10.0, if strict FP semantics are needed within a function, the
5989// function must have the strictfp attribute and all calls within the function
5990// must also have the strictfp attribute. This latter restriction is
5991// necessary to prevent unwanted libcall simplification when a function is
5992// being cloned (such as for inlining).
5993//
5994// The "dangling" strictfp attribute usage was only used to prevent constant
5995// folding and other libcall simplification. The nobuiltin attribute on the
5996// callsite has the same effect.
5997struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
5998 StrictFPUpgradeVisitor() = default;
5999
6000 void visitCallBase(CallBase &Call) {
6001 if (!Call.isStrictFP())
6002 return;
6004 return;
6005 // If we get here, the caller doesn't have the strictfp attribute
6006 // but this callsite does. Replace the strictfp attribute with nobuiltin.
6007 Call.removeFnAttr(Attribute::StrictFP);
6008 Call.addFnAttr(Attribute::NoBuiltin);
6009 }
6010};
6011
6012/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
6013struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6014 : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6015 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
6016
6017 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6018 if (!RMW.isFloatingPointOperation())
6019 return;
6020
6021 MDNode *Empty = MDNode::get(RMW.getContext(), {});
6022 RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);
6023 RMW.setMetadata("amdgpu.no.remote.memory.access", Empty);
6024 RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
6025 }
6026};
6027} // namespace
6028
6030 // If a function definition doesn't have the strictfp attribute,
6031 // convert any callsite strictfp attributes to nobuiltin.
6032 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
6033 StrictFPUpgradeVisitor SFPV;
6034 SFPV.visit(F);
6035 }
6036
6037 // Remove all incompatibile attributes from function.
6038 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6039 F.getReturnType(), F.getAttributes().getRetAttrs()));
6040 for (auto &Arg : F.args())
6041 Arg.removeAttrs(
6042 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6043
6044 // Older versions of LLVM treated an "implicit-section-name" attribute
6045 // similarly to directly setting the section on a Function.
6046 if (Attribute A = F.getFnAttribute("implicit-section-name");
6047 A.isValid() && A.isStringAttribute()) {
6048 F.setSection(A.getValueAsString());
6049 F.removeFnAttr("implicit-section-name");
6050 }
6051
6052 if (!F.empty()) {
6053 // For some reason this is called twice, and the first time is before any
6054 // instructions are loaded into the body.
6055
6056 if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");
6057 A.isValid()) {
6058
6059 if (A.getValueAsBool()) {
6060 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6061 Visitor.visit(F);
6062 }
6063
6064 // We will leave behind dead attribute uses on external declarations, but
6065 // clang never added these to declarations anyway.
6066 F.removeFnAttr("amdgpu-unsafe-fp-atomics");
6067 }
6068 }
6069}
6070
6071// Check if the function attribute is not present and set it.
6073 StringRef Value) {
6074 if (!F.hasFnAttribute(FnAttrName))
6075 F.addFnAttr(FnAttrName, Value);
6076}
6077
6078// Check if the function attribute is not present and set it if needed.
6079// If the attribute is "false" then removes it.
6080// If the attribute is "true" resets it to a valueless attribute.
6081static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName) {
6082 if (!F.hasFnAttribute(FnAttrName)) {
6083 if (Set)
6084 F.addFnAttr(FnAttrName);
6085 } else {
6086 auto A = F.getFnAttribute(FnAttrName);
6087 if ("false" == A.getValueAsString())
6088 F.removeFnAttr(FnAttrName);
6089 else if ("true" == A.getValueAsString()) {
6090 F.removeFnAttr(FnAttrName);
6091 F.addFnAttr(FnAttrName);
6092 }
6093 }
6094}
6095
6097 Triple T(M.getTargetTriple());
6098 if (!T.isThumb() && !T.isARM() && !T.isAArch64())
6099 return;
6100
6101 uint64_t BTEValue = 0;
6102 uint64_t BPPLRValue = 0;
6103 uint64_t GCSValue = 0;
6104 uint64_t SRAValue = 0;
6105 uint64_t SRAALLValue = 0;
6106 uint64_t SRABKeyValue = 0;
6107
6108 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6109 if (ModFlags) {
6110 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6111 MDNode *Op = ModFlags->getOperand(I);
6112 if (Op->getNumOperands() != 3)
6113 continue;
6114
6115 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6116 auto *CI = mdconst::dyn_extract<ConstantInt>(Op->getOperand(2));
6117 if (!ID || !CI)
6118 continue;
6119
6120 StringRef IDStr = ID->getString();
6121 uint64_t *ValPtr = IDStr == "branch-target-enforcement" ? &BTEValue
6122 : IDStr == "branch-protection-pauth-lr" ? &BPPLRValue
6123 : IDStr == "guarded-control-stack" ? &GCSValue
6124 : IDStr == "sign-return-address" ? &SRAValue
6125 : IDStr == "sign-return-address-all" ? &SRAALLValue
6126 : IDStr == "sign-return-address-with-bkey"
6127 ? &SRABKeyValue
6128 : nullptr;
6129 if (!ValPtr)
6130 continue;
6131
6132 *ValPtr = CI->getZExtValue();
6133 if (*ValPtr == 2)
6134 return;
6135 }
6136 }
6137
6138 bool BTE = BTEValue == 1;
6139 bool BPPLR = BPPLRValue == 1;
6140 bool GCS = GCSValue == 1;
6141 bool SRA = SRAValue == 1;
6142
6143 StringRef SignTypeValue = "non-leaf";
6144 if (SRA && SRAALLValue == 1)
6145 SignTypeValue = "all";
6146
6147 StringRef SignKeyValue = "a_key";
6148 if (SRA && SRABKeyValue == 1)
6149 SignKeyValue = "b_key";
6150
6151 for (Function &F : M.getFunctionList()) {
6152 if (F.isDeclaration())
6153 continue;
6154
6155 if (SRA) {
6156 setFunctionAttrIfNotSet(F, "sign-return-address", SignTypeValue);
6157 setFunctionAttrIfNotSet(F, "sign-return-address-key", SignKeyValue);
6158 } else {
6159 if (auto A = F.getFnAttribute("sign-return-address");
6160 A.isValid() && "none" == A.getValueAsString()) {
6161 F.removeFnAttr("sign-return-address");
6162 F.removeFnAttr("sign-return-address-key");
6163 }
6164 }
6165 ConvertFunctionAttr(F, BTE, "branch-target-enforcement");
6166 ConvertFunctionAttr(F, BPPLR, "branch-protection-pauth-lr");
6167 ConvertFunctionAttr(F, GCS, "guarded-control-stack");
6168 }
6169
6170 if (BTE)
6171 M.setModuleFlag(llvm::Module::Min, "branch-target-enforcement", 2);
6172 if (BPPLR)
6173 M.setModuleFlag(llvm::Module::Min, "branch-protection-pauth-lr", 2);
6174 if (GCS)
6175 M.setModuleFlag(llvm::Module::Min, "guarded-control-stack", 2);
6176 if (SRA) {
6177 M.setModuleFlag(llvm::Module::Min, "sign-return-address", 2);
6178 if (SRAALLValue == 1)
6179 M.setModuleFlag(llvm::Module::Min, "sign-return-address-all", 2);
6180 if (SRABKeyValue == 1)
6181 M.setModuleFlag(llvm::Module::Min, "sign-return-address-with-bkey", 2);
6182 }
6183}
6184
6185static bool isOldLoopArgument(Metadata *MD) {
6186 auto *T = dyn_cast_or_null<MDTuple>(MD);
6187 if (!T)
6188 return false;
6189 if (T->getNumOperands() < 1)
6190 return false;
6191 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
6192 if (!S)
6193 return false;
6194 return S->getString().starts_with("llvm.vectorizer.");
6195}
6196
6198 StringRef OldPrefix = "llvm.vectorizer.";
6199 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
6200
6201 if (OldTag == "llvm.vectorizer.unroll")
6202 return MDString::get(C, "llvm.loop.interleave.count");
6203
6204 return MDString::get(
6205 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
6206 .str());
6207}
6208
6210 auto *T = dyn_cast_or_null<MDTuple>(MD);
6211 if (!T)
6212 return MD;
6213 if (T->getNumOperands() < 1)
6214 return MD;
6215 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
6216 if (!OldTag)
6217 return MD;
6218 if (!OldTag->getString().starts_with("llvm.vectorizer."))
6219 return MD;
6220
6221 // This has an old tag. Upgrade it.
6223 Ops.reserve(T->getNumOperands());
6224 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
6225 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
6226 Ops.push_back(T->getOperand(I));
6227
6228 return MDTuple::get(T->getContext(), Ops);
6229}
6230
6232 auto *T = dyn_cast<MDTuple>(&N);
6233 if (!T)
6234 return &N;
6235
6236 if (none_of(T->operands(), isOldLoopArgument))
6237 return &N;
6238
6240 Ops.reserve(T->getNumOperands());
6241 for (Metadata *MD : T->operands())
6242 Ops.push_back(upgradeLoopArgument(MD));
6243
6244 return MDTuple::get(T->getContext(), Ops);
6245}
6246
6248 Triple T(TT);
6249 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
6250 // the address space of globals to 1. This does not apply to SPIRV Logical.
6251 if ((T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical())) &&
6252 !DL.contains("-G") && !DL.starts_with("G")) {
6253 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
6254 }
6255
6256 if (T.isLoongArch64() || T.isRISCV64()) {
6257 // Make i32 a native type for 64-bit LoongArch and RISC-V.
6258 auto I = DL.find("-n64-");
6259 if (I != StringRef::npos)
6260 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
6261 return DL.str();
6262 }
6263
6264 // AMDGPU data layout upgrades.
6265 std::string Res = DL.str();
6266 if (T.isAMDGPU()) {
6267 // Define address spaces for constants.
6268 if (!DL.contains("-G") && !DL.starts_with("G"))
6269 Res.append(Res.empty() ? "G1" : "-G1");
6270
6271 // AMDGCN data layout upgrades.
6272 if (T.isAMDGCN()) {
6273
6274 // Add missing non-integral declarations.
6275 // This goes before adding new address spaces to prevent incoherent string
6276 // values.
6277 if (!DL.contains("-ni") && !DL.starts_with("ni"))
6278 Res.append("-ni:7:8:9");
6279 // Update ni:7 to ni:7:8:9.
6280 if (DL.ends_with("ni:7"))
6281 Res.append(":8:9");
6282 if (DL.ends_with("ni:7:8"))
6283 Res.append(":9");
6284
6285 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
6286 // resources) An empty data layout has already been upgraded to G1 by now.
6287 if (!DL.contains("-p7") && !DL.starts_with("p7"))
6288 Res.append("-p7:160:256:256:32");
6289 if (!DL.contains("-p8") && !DL.starts_with("p8"))
6290 Res.append("-p8:128:128:128:48");
6291 constexpr StringRef OldP8("-p8:128:128-");
6292 if (DL.contains(OldP8))
6293 Res.replace(Res.find(OldP8), OldP8.size(), "-p8:128:128:128:48-");
6294 if (!DL.contains("-p9") && !DL.starts_with("p9"))
6295 Res.append("-p9:192:256:256:32");
6296 }
6297
6298 // Upgrade the ELF mangling mode.
6299 if (!DL.contains("m:e"))
6300 Res = Res.empty() ? "m:e" : "m:e-" + Res;
6301
6302 return Res;
6303 }
6304
6305 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
6306 // If the datalayout matches the expected format, add pointer size address
6307 // spaces to the datalayout.
6308 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
6309 if (!DL.contains(AddrSpaces)) {
6311 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6312 if (R.match(Res, &Groups))
6313 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
6314 }
6315 };
6316
6317 // AArch64 data layout upgrades.
6318 if (T.isAArch64()) {
6319 // Add "-Fn32"
6320 if (!DL.empty() && !DL.contains("-Fn32"))
6321 Res.append("-Fn32");
6322 AddPtr32Ptr64AddrSpaces();
6323 return Res;
6324 }
6325
6326 if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC64() ||
6327 T.isWasm()) {
6328 // Mips64 with o32 ABI did not add "-i128:128".
6329 // Add "-i128:128"
6330 std::string I64 = "-i64:64";
6331 std::string I128 = "-i128:128";
6332 if (!StringRef(Res).contains(I128)) {
6333 size_t Pos = Res.find(I64);
6334 if (Pos != size_t(-1))
6335 Res.insert(Pos + I64.size(), I128);
6336 }
6337 return Res;
6338 }
6339
6340 if (!T.isX86())
6341 return Res;
6342
6343 AddPtr32Ptr64AddrSpaces();
6344
6345 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
6346 // for i128 operations prior to this being reflected in the data layout, and
6347 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
6348 // boundaries, so although this is a breaking change, the upgrade is expected
6349 // to fix more IR than it breaks.
6350 // Intel MCU is an exception and uses 4-byte-alignment.
6351 if (!T.isOSIAMCU()) {
6352 std::string I128 = "-i128:128";
6353 if (StringRef Ref = Res; !Ref.contains(I128)) {
6355 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6356 if (R.match(Res, &Groups))
6357 Res = (Groups[1] + I128 + Groups[3]).str();
6358 }
6359 }
6360
6361 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
6362 // Raising the alignment is safe because Clang did not produce f80 values in
6363 // the MSVC environment before this upgrade was added.
6364 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
6365 StringRef Ref = Res;
6366 auto I = Ref.find("-f80:32-");
6367 if (I != StringRef::npos)
6368 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
6369 }
6370
6371 return Res;
6372}
6373
6374void llvm::UpgradeAttributes(AttrBuilder &B) {
6375 StringRef FramePointer;
6376 Attribute A = B.getAttribute("no-frame-pointer-elim");
6377 if (A.isValid()) {
6378 // The value can be "true" or "false".
6379 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
6380 B.removeAttribute("no-frame-pointer-elim");
6381 }
6382 if (B.contains("no-frame-pointer-elim-non-leaf")) {
6383 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
6384 if (FramePointer != "all")
6385 FramePointer = "non-leaf";
6386 B.removeAttribute("no-frame-pointer-elim-non-leaf");
6387 }
6388 if (!FramePointer.empty())
6389 B.addAttribute("frame-pointer", FramePointer);
6390
6391 A = B.getAttribute("null-pointer-is-valid");
6392 if (A.isValid()) {
6393 // The value can be "true" or "false".
6394 bool NullPointerIsValid = A.getValueAsString() == "true";
6395 B.removeAttribute("null-pointer-is-valid");
6396 if (NullPointerIsValid)
6397 B.addAttribute(Attribute::NullPointerIsValid);
6398 }
6399}
6400
6401void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
6402 // clang.arc.attachedcall bundles are now required to have an operand.
6403 // If they don't, it's okay to drop them entirely: when there is an operand,
6404 // the "attachedcall" is meaningful and required, but without an operand,
6405 // it's just a marker NOP. Dropping it merely prevents an optimization.
6406 erase_if(Bundles, [&](OperandBundleDef &OBD) {
6407 return OBD.getTag() == "clang.arc.attachedcall" &&
6408 OBD.inputs().empty();
6409 });
6410}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static MDNode * getDebugLocSafe(const Instruction *I)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define R2(n)
This file contains the declarations for metadata subclasses.
#define T
NVPTX address space definition.
uint64_t High
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:472
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ FAdd
*p = old + v
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:69
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:536
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DWARF expression.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Diagnostic information for debug metadata version reporting.
Diagnostic information for stripping invalid debug metadata.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
void setApproxFunc(bool B=true)
Definition FMF.h:93
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:803
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:166
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition Function.h:244
const Function & getFunction() const
Definition Function.h:164
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition Function.cpp:448
size_t arg_size() const
Definition Function.h:899
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:214
Argument * getArg(unsigned i) const
Definition Function.h:884
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:605
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
Base class for instruction visitors.
Definition InstVisitor.h:78
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition MDBuilder.cpp:96
Metadata node.
Definition Metadata.h:1078
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1442
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1448
LLVMContext & getContext() const
Definition Metadata.h:1242
Tracking metadata reference owned by Metadata.
Definition Metadata.h:900
A single uniqued string.
Definition Metadata.h:721
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:608
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1526
Metadata wrapper in the Value hierarchy.
Definition Metadata.h:183
static LLVM_ABI MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition Metadata.cpp:104
Root of the metadata hierarchy.
Definition Metadata.h:64
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
Definition Module.h:117
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
Definition Module.h:138
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
Definition Module.h:120
@ Min
Takes the min of the two values, which are required to be integers.
Definition Module.h:152
@ Max
Takes the max of the two values, which are required to be integers.
Definition Module.h:149
A tuple of MDNodes.
Definition Metadata.h:1757
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
Definition Metadata.h:1853
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
StringRef getTag() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition Regex.cpp:83
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition Type.cpp:825
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:854
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:702
static constexpr size_t npos
Definition StringRef.h:57
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:573
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:611
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
Definition StringRef.h:816
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:414
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:298
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:297
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:153
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition Type.h:145
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:295
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:231
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:270
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition Type.h:225
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:285
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
Definition Type.cpp:284
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:283
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:382
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:538
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1091
bool hasName() const
Definition Value.h:262
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:314
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:388
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:201
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
Definition Metadata.h:708
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
Definition Metadata.h:695
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:667
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
@ Length
Definition DWP.cpp:477
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1655
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:296
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
Definition Casting.h:732
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
Op::Description Desc
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1758
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2120
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ DEBUG_METADATA_VERSION
Definition Metadata.h:54
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106