LLVM 22.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/StringRef.h"
22#include "llvm/IR/Attributes.h"
23#include "llvm/IR/CallingConv.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DebugInfo.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/InstVisitor.h"
31#include "llvm/IR/Instruction.h"
33#include "llvm/IR/Intrinsics.h"
34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsARM.h"
36#include "llvm/IR/IntrinsicsNVPTX.h"
37#include "llvm/IR/IntrinsicsRISCV.h"
38#include "llvm/IR/IntrinsicsWebAssembly.h"
39#include "llvm/IR/IntrinsicsX86.h"
40#include "llvm/IR/LLVMContext.h"
41#include "llvm/IR/MDBuilder.h"
42#include "llvm/IR/Metadata.h"
43#include "llvm/IR/Module.h"
44#include "llvm/IR/Value.h"
45#include "llvm/IR/Verifier.h"
50#include "llvm/Support/Regex.h"
53#include <cstdint>
54#include <cstring>
55#include <numeric>
56
57using namespace llvm;
58
59static cl::opt<bool>
60 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
61 cl::desc("Disable autoupgrade of debug info"));
62
63static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
64
65// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
66// changed their type from v4f32 to v2i64.
68 Function *&NewFn) {
69 // Check whether this is an old version of the function, which received
70 // v4f32 arguments.
71 Type *Arg0Type = F->getFunctionType()->getParamType(0);
72 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
73 return false;
74
75 // Yes, it's old, replace it with new version.
76 rename(F);
77 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
78 return true;
79}
80
81// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
82// arguments have changed their type from i32 to i8.
84 Function *&NewFn) {
85 // Check that the last argument is an i32.
86 Type *LastArgType = F->getFunctionType()->getParamType(
87 F->getFunctionType()->getNumParams() - 1);
88 if (!LastArgType->isIntegerTy(32))
89 return false;
90
91 // Move this function aside and map down.
92 rename(F);
93 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
94 return true;
95}
96
97// Upgrade the declaration of fp compare intrinsics that change return type
98// from scalar to vXi1 mask.
100 Function *&NewFn) {
101 // Check if the return type is a vector.
102 if (F->getReturnType()->isVectorTy())
103 return false;
104
105 rename(F);
106 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
107 return true;
108}
109
110// Upgrade the declaration of multiply and add bytes intrinsics whose input
111// arguments' types have changed from vectors of i32 to vectors of i8
113 Function *&NewFn) {
114 // check if input argument type is a vector of i8
115 Type *Arg1Type = F->getFunctionType()->getParamType(1);
116 Type *Arg2Type = F->getFunctionType()->getParamType(2);
117 if (Arg1Type->isVectorTy() &&
118 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(8) &&
119 Arg2Type->isVectorTy() &&
120 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(8))
121 return false;
122
123 rename(F);
124 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
125 return true;
126}
127
128// Upgrade the declaration of multipy and add words intrinsics whose input
129// arguments' types have changed to vectors of i32 to vectors of i16
131 Function *&NewFn) {
132 // check if input argument type is a vector of i16
133 Type *Arg1Type = F->getFunctionType()->getParamType(1);
134 Type *Arg2Type = F->getFunctionType()->getParamType(2);
135 if (Arg1Type->isVectorTy() &&
136 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(16) &&
137 Arg2Type->isVectorTy() &&
138 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(16))
139 return false;
140
141 rename(F);
142 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
143 return true;
144}
145
147 Function *&NewFn) {
148 if (F->getReturnType()->getScalarType()->isBFloatTy())
149 return false;
150
151 rename(F);
152 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
153 return true;
154}
155
157 Function *&NewFn) {
158 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
159 return false;
160
161 rename(F);
162 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
163 return true;
164}
165
167 // All of the intrinsics matches below should be marked with which llvm
168 // version started autoupgrading them. At some point in the future we would
169 // like to use this information to remove upgrade code for some older
170 // intrinsics. It is currently undecided how we will determine that future
171 // point.
172 if (Name.consume_front("avx."))
173 return (Name.starts_with("blend.p") || // Added in 3.7
174 Name == "cvt.ps2.pd.256" || // Added in 3.9
175 Name == "cvtdq2.pd.256" || // Added in 3.9
176 Name == "cvtdq2.ps.256" || // Added in 7.0
177 Name.starts_with("movnt.") || // Added in 3.2
178 Name.starts_with("sqrt.p") || // Added in 7.0
179 Name.starts_with("storeu.") || // Added in 3.9
180 Name.starts_with("vbroadcast.s") || // Added in 3.5
181 Name.starts_with("vbroadcastf128") || // Added in 4.0
182 Name.starts_with("vextractf128.") || // Added in 3.7
183 Name.starts_with("vinsertf128.") || // Added in 3.7
184 Name.starts_with("vperm2f128.") || // Added in 6.0
185 Name.starts_with("vpermil.")); // Added in 3.1
186
187 if (Name.consume_front("avx2."))
188 return (Name == "movntdqa" || // Added in 5.0
189 Name.starts_with("pabs.") || // Added in 6.0
190 Name.starts_with("padds.") || // Added in 8.0
191 Name.starts_with("paddus.") || // Added in 8.0
192 Name.starts_with("pblendd.") || // Added in 3.7
193 Name == "pblendw" || // Added in 3.7
194 Name.starts_with("pbroadcast") || // Added in 3.8
195 Name.starts_with("pcmpeq.") || // Added in 3.1
196 Name.starts_with("pcmpgt.") || // Added in 3.1
197 Name.starts_with("pmax") || // Added in 3.9
198 Name.starts_with("pmin") || // Added in 3.9
199 Name.starts_with("pmovsx") || // Added in 3.9
200 Name.starts_with("pmovzx") || // Added in 3.9
201 Name == "pmul.dq" || // Added in 7.0
202 Name == "pmulu.dq" || // Added in 7.0
203 Name.starts_with("psll.dq") || // Added in 3.7
204 Name.starts_with("psrl.dq") || // Added in 3.7
205 Name.starts_with("psubs.") || // Added in 8.0
206 Name.starts_with("psubus.") || // Added in 8.0
207 Name.starts_with("vbroadcast") || // Added in 3.8
208 Name == "vbroadcasti128" || // Added in 3.7
209 Name == "vextracti128" || // Added in 3.7
210 Name == "vinserti128" || // Added in 3.7
211 Name == "vperm2i128"); // Added in 6.0
212
213 if (Name.consume_front("avx512.")) {
214 if (Name.consume_front("mask."))
215 // 'avx512.mask.*'
216 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
217 Name.starts_with("and.") || // Added in 3.9
218 Name.starts_with("andn.") || // Added in 3.9
219 Name.starts_with("broadcast.s") || // Added in 3.9
220 Name.starts_with("broadcastf32x4.") || // Added in 6.0
221 Name.starts_with("broadcastf32x8.") || // Added in 6.0
222 Name.starts_with("broadcastf64x2.") || // Added in 6.0
223 Name.starts_with("broadcastf64x4.") || // Added in 6.0
224 Name.starts_with("broadcasti32x4.") || // Added in 6.0
225 Name.starts_with("broadcasti32x8.") || // Added in 6.0
226 Name.starts_with("broadcasti64x2.") || // Added in 6.0
227 Name.starts_with("broadcasti64x4.") || // Added in 6.0
228 Name.starts_with("cmp.b") || // Added in 5.0
229 Name.starts_with("cmp.d") || // Added in 5.0
230 Name.starts_with("cmp.q") || // Added in 5.0
231 Name.starts_with("cmp.w") || // Added in 5.0
232 Name.starts_with("compress.b") || // Added in 9.0
233 Name.starts_with("compress.d") || // Added in 9.0
234 Name.starts_with("compress.p") || // Added in 9.0
235 Name.starts_with("compress.q") || // Added in 9.0
236 Name.starts_with("compress.store.") || // Added in 7.0
237 Name.starts_with("compress.w") || // Added in 9.0
238 Name.starts_with("conflict.") || // Added in 9.0
239 Name.starts_with("cvtdq2pd.") || // Added in 4.0
240 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
241 Name == "cvtpd2dq.256" || // Added in 7.0
242 Name == "cvtpd2ps.256" || // Added in 7.0
243 Name == "cvtps2pd.128" || // Added in 7.0
244 Name == "cvtps2pd.256" || // Added in 7.0
245 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
246 Name == "cvtqq2ps.256" || // Added in 9.0
247 Name == "cvtqq2ps.512" || // Added in 9.0
248 Name == "cvttpd2dq.256" || // Added in 7.0
249 Name == "cvttps2dq.128" || // Added in 7.0
250 Name == "cvttps2dq.256" || // Added in 7.0
251 Name.starts_with("cvtudq2pd.") || // Added in 4.0
252 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
253 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
254 Name == "cvtuqq2ps.256" || // Added in 9.0
255 Name == "cvtuqq2ps.512" || // Added in 9.0
256 Name.starts_with("dbpsadbw.") || // Added in 7.0
257 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
258 Name.starts_with("expand.b") || // Added in 9.0
259 Name.starts_with("expand.d") || // Added in 9.0
260 Name.starts_with("expand.load.") || // Added in 7.0
261 Name.starts_with("expand.p") || // Added in 9.0
262 Name.starts_with("expand.q") || // Added in 9.0
263 Name.starts_with("expand.w") || // Added in 9.0
264 Name.starts_with("fpclass.p") || // Added in 7.0
265 Name.starts_with("insert") || // Added in 4.0
266 Name.starts_with("load.") || // Added in 3.9
267 Name.starts_with("loadu.") || // Added in 3.9
268 Name.starts_with("lzcnt.") || // Added in 5.0
269 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
270 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
271 Name.starts_with("movddup") || // Added in 3.9
272 Name.starts_with("move.s") || // Added in 4.0
273 Name.starts_with("movshdup") || // Added in 3.9
274 Name.starts_with("movsldup") || // Added in 3.9
275 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
276 Name.starts_with("or.") || // Added in 3.9
277 Name.starts_with("pabs.") || // Added in 6.0
278 Name.starts_with("packssdw.") || // Added in 5.0
279 Name.starts_with("packsswb.") || // Added in 5.0
280 Name.starts_with("packusdw.") || // Added in 5.0
281 Name.starts_with("packuswb.") || // Added in 5.0
282 Name.starts_with("padd.") || // Added in 4.0
283 Name.starts_with("padds.") || // Added in 8.0
284 Name.starts_with("paddus.") || // Added in 8.0
285 Name.starts_with("palignr.") || // Added in 3.9
286 Name.starts_with("pand.") || // Added in 3.9
287 Name.starts_with("pandn.") || // Added in 3.9
288 Name.starts_with("pavg") || // Added in 6.0
289 Name.starts_with("pbroadcast") || // Added in 6.0
290 Name.starts_with("pcmpeq.") || // Added in 3.9
291 Name.starts_with("pcmpgt.") || // Added in 3.9
292 Name.starts_with("perm.df.") || // Added in 3.9
293 Name.starts_with("perm.di.") || // Added in 3.9
294 Name.starts_with("permvar.") || // Added in 7.0
295 Name.starts_with("pmaddubs.w.") || // Added in 7.0
296 Name.starts_with("pmaddw.d.") || // Added in 7.0
297 Name.starts_with("pmax") || // Added in 4.0
298 Name.starts_with("pmin") || // Added in 4.0
299 Name == "pmov.qd.256" || // Added in 9.0
300 Name == "pmov.qd.512" || // Added in 9.0
301 Name == "pmov.wb.256" || // Added in 9.0
302 Name == "pmov.wb.512" || // Added in 9.0
303 Name.starts_with("pmovsx") || // Added in 4.0
304 Name.starts_with("pmovzx") || // Added in 4.0
305 Name.starts_with("pmul.dq.") || // Added in 4.0
306 Name.starts_with("pmul.hr.sw.") || // Added in 7.0
307 Name.starts_with("pmulh.w.") || // Added in 7.0
308 Name.starts_with("pmulhu.w.") || // Added in 7.0
309 Name.starts_with("pmull.") || // Added in 4.0
310 Name.starts_with("pmultishift.qb.") || // Added in 8.0
311 Name.starts_with("pmulu.dq.") || // Added in 4.0
312 Name.starts_with("por.") || // Added in 3.9
313 Name.starts_with("prol.") || // Added in 8.0
314 Name.starts_with("prolv.") || // Added in 8.0
315 Name.starts_with("pror.") || // Added in 8.0
316 Name.starts_with("prorv.") || // Added in 8.0
317 Name.starts_with("pshuf.b.") || // Added in 4.0
318 Name.starts_with("pshuf.d.") || // Added in 3.9
319 Name.starts_with("pshufh.w.") || // Added in 3.9
320 Name.starts_with("pshufl.w.") || // Added in 3.9
321 Name.starts_with("psll.d") || // Added in 4.0
322 Name.starts_with("psll.q") || // Added in 4.0
323 Name.starts_with("psll.w") || // Added in 4.0
324 Name.starts_with("pslli") || // Added in 4.0
325 Name.starts_with("psllv") || // Added in 4.0
326 Name.starts_with("psra.d") || // Added in 4.0
327 Name.starts_with("psra.q") || // Added in 4.0
328 Name.starts_with("psra.w") || // Added in 4.0
329 Name.starts_with("psrai") || // Added in 4.0
330 Name.starts_with("psrav") || // Added in 4.0
331 Name.starts_with("psrl.d") || // Added in 4.0
332 Name.starts_with("psrl.q") || // Added in 4.0
333 Name.starts_with("psrl.w") || // Added in 4.0
334 Name.starts_with("psrli") || // Added in 4.0
335 Name.starts_with("psrlv") || // Added in 4.0
336 Name.starts_with("psub.") || // Added in 4.0
337 Name.starts_with("psubs.") || // Added in 8.0
338 Name.starts_with("psubus.") || // Added in 8.0
339 Name.starts_with("pternlog.") || // Added in 7.0
340 Name.starts_with("punpckh") || // Added in 3.9
341 Name.starts_with("punpckl") || // Added in 3.9
342 Name.starts_with("pxor.") || // Added in 3.9
343 Name.starts_with("shuf.f") || // Added in 6.0
344 Name.starts_with("shuf.i") || // Added in 6.0
345 Name.starts_with("shuf.p") || // Added in 4.0
346 Name.starts_with("sqrt.p") || // Added in 7.0
347 Name.starts_with("store.b.") || // Added in 3.9
348 Name.starts_with("store.d.") || // Added in 3.9
349 Name.starts_with("store.p") || // Added in 3.9
350 Name.starts_with("store.q.") || // Added in 3.9
351 Name.starts_with("store.w.") || // Added in 3.9
352 Name == "store.ss" || // Added in 7.0
353 Name.starts_with("storeu.") || // Added in 3.9
354 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
355 Name.starts_with("ucmp.") || // Added in 5.0
356 Name.starts_with("unpckh.") || // Added in 3.9
357 Name.starts_with("unpckl.") || // Added in 3.9
358 Name.starts_with("valign.") || // Added in 4.0
359 Name == "vcvtph2ps.128" || // Added in 11.0
360 Name == "vcvtph2ps.256" || // Added in 11.0
361 Name.starts_with("vextract") || // Added in 4.0
362 Name.starts_with("vfmadd.") || // Added in 7.0
363 Name.starts_with("vfmaddsub.") || // Added in 7.0
364 Name.starts_with("vfnmadd.") || // Added in 7.0
365 Name.starts_with("vfnmsub.") || // Added in 7.0
366 Name.starts_with("vpdpbusd.") || // Added in 7.0
367 Name.starts_with("vpdpbusds.") || // Added in 7.0
368 Name.starts_with("vpdpwssd.") || // Added in 7.0
369 Name.starts_with("vpdpwssds.") || // Added in 7.0
370 Name.starts_with("vpermi2var.") || // Added in 7.0
371 Name.starts_with("vpermil.p") || // Added in 3.9
372 Name.starts_with("vpermilvar.") || // Added in 4.0
373 Name.starts_with("vpermt2var.") || // Added in 7.0
374 Name.starts_with("vpmadd52") || // Added in 7.0
375 Name.starts_with("vpshld.") || // Added in 7.0
376 Name.starts_with("vpshldv.") || // Added in 8.0
377 Name.starts_with("vpshrd.") || // Added in 7.0
378 Name.starts_with("vpshrdv.") || // Added in 8.0
379 Name.starts_with("vpshufbitqmb.") || // Added in 8.0
380 Name.starts_with("xor.")); // Added in 3.9
381
382 if (Name.consume_front("mask3."))
383 // 'avx512.mask3.*'
384 return (Name.starts_with("vfmadd.") || // Added in 7.0
385 Name.starts_with("vfmaddsub.") || // Added in 7.0
386 Name.starts_with("vfmsub.") || // Added in 7.0
387 Name.starts_with("vfmsubadd.") || // Added in 7.0
388 Name.starts_with("vfnmsub.")); // Added in 7.0
389
390 if (Name.consume_front("maskz."))
391 // 'avx512.maskz.*'
392 return (Name.starts_with("pternlog.") || // Added in 7.0
393 Name.starts_with("vfmadd.") || // Added in 7.0
394 Name.starts_with("vfmaddsub.") || // Added in 7.0
395 Name.starts_with("vpdpbusd.") || // Added in 7.0
396 Name.starts_with("vpdpbusds.") || // Added in 7.0
397 Name.starts_with("vpdpwssd.") || // Added in 7.0
398 Name.starts_with("vpdpwssds.") || // Added in 7.0
399 Name.starts_with("vpermt2var.") || // Added in 7.0
400 Name.starts_with("vpmadd52") || // Added in 7.0
401 Name.starts_with("vpshldv.") || // Added in 8.0
402 Name.starts_with("vpshrdv.")); // Added in 8.0
403
404 // 'avx512.*'
405 return (Name == "movntdqa" || // Added in 5.0
406 Name == "pmul.dq.512" || // Added in 7.0
407 Name == "pmulu.dq.512" || // Added in 7.0
408 Name.starts_with("broadcastm") || // Added in 6.0
409 Name.starts_with("cmp.p") || // Added in 12.0
410 Name.starts_with("cvtb2mask.") || // Added in 7.0
411 Name.starts_with("cvtd2mask.") || // Added in 7.0
412 Name.starts_with("cvtmask2") || // Added in 5.0
413 Name.starts_with("cvtq2mask.") || // Added in 7.0
414 Name == "cvtusi2sd" || // Added in 7.0
415 Name.starts_with("cvtw2mask.") || // Added in 7.0
416 Name == "kand.w" || // Added in 7.0
417 Name == "kandn.w" || // Added in 7.0
418 Name == "knot.w" || // Added in 7.0
419 Name == "kor.w" || // Added in 7.0
420 Name == "kortestc.w" || // Added in 7.0
421 Name == "kortestz.w" || // Added in 7.0
422 Name.starts_with("kunpck") || // added in 6.0
423 Name == "kxnor.w" || // Added in 7.0
424 Name == "kxor.w" || // Added in 7.0
425 Name.starts_with("padds.") || // Added in 8.0
426 Name.starts_with("pbroadcast") || // Added in 3.9
427 Name.starts_with("prol") || // Added in 8.0
428 Name.starts_with("pror") || // Added in 8.0
429 Name.starts_with("psll.dq") || // Added in 3.9
430 Name.starts_with("psrl.dq") || // Added in 3.9
431 Name.starts_with("psubs.") || // Added in 8.0
432 Name.starts_with("ptestm") || // Added in 6.0
433 Name.starts_with("ptestnm") || // Added in 6.0
434 Name.starts_with("storent.") || // Added in 3.9
435 Name.starts_with("vbroadcast.s") || // Added in 7.0
436 Name.starts_with("vpshld.") || // Added in 8.0
437 Name.starts_with("vpshrd.")); // Added in 8.0
438 }
439
440 if (Name.consume_front("fma."))
441 return (Name.starts_with("vfmadd.") || // Added in 7.0
442 Name.starts_with("vfmsub.") || // Added in 7.0
443 Name.starts_with("vfmsubadd.") || // Added in 7.0
444 Name.starts_with("vfnmadd.") || // Added in 7.0
445 Name.starts_with("vfnmsub.")); // Added in 7.0
446
447 if (Name.consume_front("fma4."))
448 return Name.starts_with("vfmadd.s"); // Added in 7.0
449
450 if (Name.consume_front("sse."))
451 return (Name == "add.ss" || // Added in 4.0
452 Name == "cvtsi2ss" || // Added in 7.0
453 Name == "cvtsi642ss" || // Added in 7.0
454 Name == "div.ss" || // Added in 4.0
455 Name == "mul.ss" || // Added in 4.0
456 Name.starts_with("sqrt.p") || // Added in 7.0
457 Name == "sqrt.ss" || // Added in 7.0
458 Name.starts_with("storeu.") || // Added in 3.9
459 Name == "sub.ss"); // Added in 4.0
460
461 if (Name.consume_front("sse2."))
462 return (Name == "add.sd" || // Added in 4.0
463 Name == "cvtdq2pd" || // Added in 3.9
464 Name == "cvtdq2ps" || // Added in 7.0
465 Name == "cvtps2pd" || // Added in 3.9
466 Name == "cvtsi2sd" || // Added in 7.0
467 Name == "cvtsi642sd" || // Added in 7.0
468 Name == "cvtss2sd" || // Added in 7.0
469 Name == "div.sd" || // Added in 4.0
470 Name == "mul.sd" || // Added in 4.0
471 Name.starts_with("padds.") || // Added in 8.0
472 Name.starts_with("paddus.") || // Added in 8.0
473 Name.starts_with("pcmpeq.") || // Added in 3.1
474 Name.starts_with("pcmpgt.") || // Added in 3.1
475 Name == "pmaxs.w" || // Added in 3.9
476 Name == "pmaxu.b" || // Added in 3.9
477 Name == "pmins.w" || // Added in 3.9
478 Name == "pminu.b" || // Added in 3.9
479 Name == "pmulu.dq" || // Added in 7.0
480 Name.starts_with("pshuf") || // Added in 3.9
481 Name.starts_with("psll.dq") || // Added in 3.7
482 Name.starts_with("psrl.dq") || // Added in 3.7
483 Name.starts_with("psubs.") || // Added in 8.0
484 Name.starts_with("psubus.") || // Added in 8.0
485 Name.starts_with("sqrt.p") || // Added in 7.0
486 Name == "sqrt.sd" || // Added in 7.0
487 Name == "storel.dq" || // Added in 3.9
488 Name.starts_with("storeu.") || // Added in 3.9
489 Name == "sub.sd"); // Added in 4.0
490
491 if (Name.consume_front("sse41."))
492 return (Name.starts_with("blendp") || // Added in 3.7
493 Name == "movntdqa" || // Added in 5.0
494 Name == "pblendw" || // Added in 3.7
495 Name == "pmaxsb" || // Added in 3.9
496 Name == "pmaxsd" || // Added in 3.9
497 Name == "pmaxud" || // Added in 3.9
498 Name == "pmaxuw" || // Added in 3.9
499 Name == "pminsb" || // Added in 3.9
500 Name == "pminsd" || // Added in 3.9
501 Name == "pminud" || // Added in 3.9
502 Name == "pminuw" || // Added in 3.9
503 Name.starts_with("pmovsx") || // Added in 3.8
504 Name.starts_with("pmovzx") || // Added in 3.9
505 Name == "pmuldq"); // Added in 7.0
506
507 if (Name.consume_front("sse42."))
508 return Name == "crc32.64.8"; // Added in 3.4
509
510 if (Name.consume_front("sse4a."))
511 return Name.starts_with("movnt."); // Added in 3.9
512
513 if (Name.consume_front("ssse3."))
514 return (Name == "pabs.b.128" || // Added in 6.0
515 Name == "pabs.d.128" || // Added in 6.0
516 Name == "pabs.w.128"); // Added in 6.0
517
518 if (Name.consume_front("xop."))
519 return (Name == "vpcmov" || // Added in 3.8
520 Name == "vpcmov.256" || // Added in 5.0
521 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
522 Name.starts_with("vprot")); // Added in 8.0
523
524 return (Name == "addcarry.u32" || // Added in 8.0
525 Name == "addcarry.u64" || // Added in 8.0
526 Name == "addcarryx.u32" || // Added in 8.0
527 Name == "addcarryx.u64" || // Added in 8.0
528 Name == "subborrow.u32" || // Added in 8.0
529 Name == "subborrow.u64" || // Added in 8.0
530 Name.starts_with("vcvtph2ps.")); // Added in 11.0
531}
532
534 Function *&NewFn) {
535 // Only handle intrinsics that start with "x86.".
536 if (!Name.consume_front("x86."))
537 return false;
538
539 if (shouldUpgradeX86Intrinsic(F, Name)) {
540 NewFn = nullptr;
541 return true;
542 }
543
544 if (Name == "rdtscp") { // Added in 8.0
545 // If this intrinsic has 0 operands, it's the new version.
546 if (F->getFunctionType()->getNumParams() == 0)
547 return false;
548
549 rename(F);
550 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
551 Intrinsic::x86_rdtscp);
552 return true;
553 }
554
556
557 // SSE4.1 ptest functions may have an old signature.
558 if (Name.consume_front("sse41.ptest")) { // Added in 3.2
560 .Case("c", Intrinsic::x86_sse41_ptestc)
561 .Case("z", Intrinsic::x86_sse41_ptestz)
562 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
565 return upgradePTESTIntrinsic(F, ID, NewFn);
566
567 return false;
568 }
569
570 // Several blend and other instructions with masks used the wrong number of
571 // bits.
572
573 // Added in 3.6
575 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
576 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
577 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
578 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
579 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
580 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
583 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
584
585 if (Name.consume_front("avx512.")) {
586 if (Name.consume_front("mask.cmp.")) {
587 // Added in 7.0
589 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
590 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
591 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
592 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
593 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
594 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
597 return upgradeX86MaskedFPCompare(F, ID, NewFn);
598 } else if (Name.starts_with("vpdpbusd.") ||
599 Name.starts_with("vpdpbusds.")) {
600 // Added in 21.1
602 .Case("vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
603 .Case("vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
604 .Case("vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
605 .Case("vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
606 .Case("vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
607 .Case("vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
610 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
611 } else if (Name.starts_with("vpdpwssd.") ||
612 Name.starts_with("vpdpwssds.")) {
613 // Added in 21.1
615 .Case("vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)
616 .Case("vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)
617 .Case("vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)
618 .Case("vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)
619 .Case("vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)
620 .Case("vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)
623 return upgradeX86MultiplyAddWords(F, ID, NewFn);
624 }
625 return false; // No other 'x86.avx512.*'.
626 }
627
628 if (Name.consume_front("avx2.")) {
629 if (Name.consume_front("vpdpb")) {
630 // Added in 21.1
632 .Case("ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
633 .Case("ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
634 .Case("ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
635 .Case("ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
636 .Case("sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
637 .Case("sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
638 .Case("suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
639 .Case("suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
640 .Case("uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
641 .Case("uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
642 .Case("uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
643 .Case("uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
646 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
647 } else if (Name.consume_front("vpdpw")) {
648 // Added in 21.1
650 .Case("sud.128", Intrinsic::x86_avx2_vpdpwsud_128)
651 .Case("sud.256", Intrinsic::x86_avx2_vpdpwsud_256)
652 .Case("suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)
653 .Case("suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)
654 .Case("usd.128", Intrinsic::x86_avx2_vpdpwusd_128)
655 .Case("usd.256", Intrinsic::x86_avx2_vpdpwusd_256)
656 .Case("usds.128", Intrinsic::x86_avx2_vpdpwusds_128)
657 .Case("usds.256", Intrinsic::x86_avx2_vpdpwusds_256)
658 .Case("uud.128", Intrinsic::x86_avx2_vpdpwuud_128)
659 .Case("uud.256", Intrinsic::x86_avx2_vpdpwuud_256)
660 .Case("uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)
661 .Case("uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)
664 return upgradeX86MultiplyAddWords(F, ID, NewFn);
665 }
666 return false; // No other 'x86.avx2.*'
667 }
668
669 if (Name.consume_front("avx10.")) {
670 if (Name.consume_front("vpdpb")) {
671 // Added in 21.1
673 .Case("ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
674 .Case("ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
675 .Case("sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
676 .Case("suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
677 .Case("uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
678 .Case("uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
681 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
682 } else if (Name.consume_front("vpdpw")) {
684 .Case("sud.512", Intrinsic::x86_avx10_vpdpwsud_512)
685 .Case("suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)
686 .Case("usd.512", Intrinsic::x86_avx10_vpdpwusd_512)
687 .Case("usds.512", Intrinsic::x86_avx10_vpdpwusds_512)
688 .Case("uud.512", Intrinsic::x86_avx10_vpdpwuud_512)
689 .Case("uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)
692 return upgradeX86MultiplyAddWords(F, ID, NewFn);
693 }
694 return false; // No other 'x86.avx10.*'
695 }
696
697 if (Name.consume_front("avx512bf16.")) {
698 // Added in 9.0
700 .Case("cvtne2ps2bf16.128",
701 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
702 .Case("cvtne2ps2bf16.256",
703 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
704 .Case("cvtne2ps2bf16.512",
705 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
706 .Case("mask.cvtneps2bf16.128",
707 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
708 .Case("cvtneps2bf16.256",
709 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
710 .Case("cvtneps2bf16.512",
711 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
714 return upgradeX86BF16Intrinsic(F, ID, NewFn);
715
716 // Added in 9.0
718 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
719 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
720 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
723 return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
724 return false; // No other 'x86.avx512bf16.*'.
725 }
726
727 if (Name.consume_front("xop.")) {
729 if (Name.starts_with("vpermil2")) { // Added in 3.9
730 // Upgrade any XOP PERMIL2 index operand still using a float/double
731 // vector.
732 auto Idx = F->getFunctionType()->getParamType(2);
733 if (Idx->isFPOrFPVectorTy()) {
734 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
735 unsigned EltSize = Idx->getScalarSizeInBits();
736 if (EltSize == 64 && IdxSize == 128)
737 ID = Intrinsic::x86_xop_vpermil2pd;
738 else if (EltSize == 32 && IdxSize == 128)
739 ID = Intrinsic::x86_xop_vpermil2ps;
740 else if (EltSize == 64 && IdxSize == 256)
741 ID = Intrinsic::x86_xop_vpermil2pd_256;
742 else
743 ID = Intrinsic::x86_xop_vpermil2ps_256;
744 }
745 } else if (F->arg_size() == 2)
746 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
748 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
749 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
751
753 rename(F);
754 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
755 return true;
756 }
757 return false; // No other 'x86.xop.*'
758 }
759
760 if (Name == "seh.recoverfp") {
761 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
762 Intrinsic::eh_recoverfp);
763 return true;
764 }
765
766 return false;
767}
768
769// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
770// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
772 StringRef Name,
773 Function *&NewFn) {
774 if (Name.starts_with("rbit")) {
775 // '(arm|aarch64).rbit'.
777 F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType());
778 return true;
779 }
780
781 if (Name == "thread.pointer") {
782 // '(arm|aarch64).thread.pointer'.
784 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
785 return true;
786 }
787
788 bool Neon = Name.consume_front("neon.");
789 if (Neon) {
790 // '(arm|aarch64).neon.*'.
791 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
792 // v16i8 respectively.
793 if (Name.consume_front("bfdot.")) {
794 // (arm|aarch64).neon.bfdot.*'.
797 .Cases({"v2f32.v8i8", "v4f32.v16i8"},
798 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
799 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
802 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
803 assert((OperandWidth == 64 || OperandWidth == 128) &&
804 "Unexpected operand width");
805 LLVMContext &Ctx = F->getParent()->getContext();
806 std::array<Type *, 2> Tys{
807 {F->getReturnType(),
808 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
809 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
810 return true;
811 }
812 return false; // No other '(arm|aarch64).neon.bfdot.*'.
813 }
814
815 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
816 // anymore and accept v8bf16 instead of v16i8.
817 if (Name.consume_front("bfm")) {
818 // (arm|aarch64).neon.bfm*'.
819 if (Name.consume_back(".v4f32.v16i8")) {
820 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
823 .Case("mla",
824 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
825 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
826 .Case("lalb",
827 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
828 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
829 .Case("lalt",
830 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
831 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
834 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
835 return true;
836 }
837 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
838 }
839 return false; // No other '(arm|aarch64).neon.bfm*.
840 }
841 // Continue on to Aarch64 Neon or Arm Neon.
842 }
843 // Continue on to Arm or Aarch64.
844
845 if (IsArm) {
846 // 'arm.*'.
847 if (Neon) {
848 // 'arm.neon.*'.
850 .StartsWith("vclz.", Intrinsic::ctlz)
851 .StartsWith("vcnt.", Intrinsic::ctpop)
852 .StartsWith("vqadds.", Intrinsic::sadd_sat)
853 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
854 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
855 .StartsWith("vqsubu.", Intrinsic::usub_sat)
856 .StartsWith("vrinta.", Intrinsic::round)
857 .StartsWith("vrintn.", Intrinsic::roundeven)
858 .StartsWith("vrintm.", Intrinsic::floor)
859 .StartsWith("vrintp.", Intrinsic::ceil)
860 .StartsWith("vrintx.", Intrinsic::rint)
861 .StartsWith("vrintz.", Intrinsic::trunc)
864 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
865 F->arg_begin()->getType());
866 return true;
867 }
868
869 if (Name.consume_front("vst")) {
870 // 'arm.neon.vst*'.
871 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
873 if (vstRegex.match(Name, &Groups)) {
874 static const Intrinsic::ID StoreInts[] = {
875 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
876 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
877
878 static const Intrinsic::ID StoreLaneInts[] = {
879 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
880 Intrinsic::arm_neon_vst4lane};
881
882 auto fArgs = F->getFunctionType()->params();
883 Type *Tys[] = {fArgs[0], fArgs[1]};
884 if (Groups[1].size() == 1)
886 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
887 else
889 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
890 return true;
891 }
892 return false; // No other 'arm.neon.vst*'.
893 }
894
895 return false; // No other 'arm.neon.*'.
896 }
897
898 if (Name.consume_front("mve.")) {
899 // 'arm.mve.*'.
900 if (Name == "vctp64") {
901 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
902 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
903 // the function and deal with it below in UpgradeIntrinsicCall.
904 rename(F);
905 return true;
906 }
907 return false; // Not 'arm.mve.vctp64'.
908 }
909
910 if (Name.starts_with("vrintn.v")) {
912 F->getParent(), Intrinsic::roundeven, F->arg_begin()->getType());
913 return true;
914 }
915
916 // These too are changed to accept a v2i1 instead of the old v4i1.
917 if (Name.consume_back(".v4i1")) {
918 // 'arm.mve.*.v4i1'.
919 if (Name.consume_back(".predicated.v2i64.v4i32"))
920 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
921 return Name == "mull.int" || Name == "vqdmull";
922
923 if (Name.consume_back(".v2i64")) {
924 // 'arm.mve.*.v2i64.v4i1'
925 bool IsGather = Name.consume_front("vldr.gather.");
926 if (IsGather || Name.consume_front("vstr.scatter.")) {
927 if (Name.consume_front("base.")) {
928 // Optional 'wb.' prefix.
929 Name.consume_front("wb.");
930 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
931 // predicated.v2i64.v2i64.v4i1'.
932 return Name == "predicated.v2i64";
933 }
934
935 if (Name.consume_front("offset.predicated."))
936 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
937 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
938
939 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
940 return false;
941 }
942
943 return false; // No other 'arm.mve.*.v2i64.v4i1'.
944 }
945 return false; // No other 'arm.mve.*.v4i1'.
946 }
947 return false; // No other 'arm.mve.*'.
948 }
949
950 if (Name.consume_front("cde.vcx")) {
951 // 'arm.cde.vcx*'.
952 if (Name.consume_back(".predicated.v2i64.v4i1"))
953 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
954 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
955 Name == "3q" || Name == "3qa";
956
957 return false; // No other 'arm.cde.vcx*'.
958 }
959 } else {
960 // 'aarch64.*'.
961 if (Neon) {
962 // 'aarch64.neon.*'.
964 .StartsWith("frintn", Intrinsic::roundeven)
965 .StartsWith("rbit", Intrinsic::bitreverse)
968 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
969 F->arg_begin()->getType());
970 return true;
971 }
972
973 if (Name.starts_with("addp")) {
974 // 'aarch64.neon.addp*'.
975 if (F->arg_size() != 2)
976 return false; // Invalid IR.
977 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
978 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
980 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
981 return true;
982 }
983 }
984
985 // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc.
986 if (Name.starts_with("bfcvt")) {
987 NewFn = nullptr;
988 return true;
989 }
990
991 return false; // No other 'aarch64.neon.*'.
992 }
993 if (Name.consume_front("sve.")) {
994 // 'aarch64.sve.*'.
995 if (Name.consume_front("bf")) {
996 if (Name.consume_back(".lane")) {
997 // 'aarch64.sve.bf*.lane'.
1000 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
1001 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1002 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1005 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1006 return true;
1007 }
1008 return false; // No other 'aarch64.sve.bf*.lane'.
1009 }
1010 return false; // No other 'aarch64.sve.bf*'.
1011 }
1012
1013 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
1014 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
1015 NewFn = nullptr;
1016 return true;
1017 }
1018
1019 if (Name.consume_front("addqv")) {
1020 // 'aarch64.sve.addqv'.
1021 if (!F->getReturnType()->isFPOrFPVectorTy())
1022 return false;
1023
1024 auto Args = F->getFunctionType()->params();
1025 Type *Tys[] = {F->getReturnType(), Args[1]};
1027 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
1028 return true;
1029 }
1030
1031 if (Name.consume_front("ld")) {
1032 // 'aarch64.sve.ld*'.
1033 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
1034 if (LdRegex.match(Name)) {
1035 Type *ScalarTy =
1036 cast<VectorType>(F->getReturnType())->getElementType();
1037 ElementCount EC =
1038 cast<VectorType>(F->arg_begin()->getType())->getElementCount();
1039 Type *Ty = VectorType::get(ScalarTy, EC);
1040 static const Intrinsic::ID LoadIDs[] = {
1041 Intrinsic::aarch64_sve_ld2_sret,
1042 Intrinsic::aarch64_sve_ld3_sret,
1043 Intrinsic::aarch64_sve_ld4_sret,
1044 };
1045 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1046 LoadIDs[Name[0] - '2'], Ty);
1047 return true;
1048 }
1049 return false; // No other 'aarch64.sve.ld*'.
1050 }
1051
1052 if (Name.consume_front("tuple.")) {
1053 // 'aarch64.sve.tuple.*'.
1054 if (Name.starts_with("get")) {
1055 // 'aarch64.sve.tuple.get*'.
1056 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
1058 F->getParent(), Intrinsic::vector_extract, Tys);
1059 return true;
1060 }
1061
1062 if (Name.starts_with("set")) {
1063 // 'aarch64.sve.tuple.set*'.
1064 auto Args = F->getFunctionType()->params();
1065 Type *Tys[] = {Args[0], Args[2], Args[1]};
1067 F->getParent(), Intrinsic::vector_insert, Tys);
1068 return true;
1069 }
1070
1071 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
1072 if (CreateTupleRegex.match(Name)) {
1073 // 'aarch64.sve.tuple.create*'.
1074 auto Args = F->getFunctionType()->params();
1075 Type *Tys[] = {F->getReturnType(), Args[1]};
1077 F->getParent(), Intrinsic::vector_insert, Tys);
1078 return true;
1079 }
1080 return false; // No other 'aarch64.sve.tuple.*'.
1081 }
1082
1083 if (Name.starts_with("rev.nxv")) {
1084 // 'aarch64.sve.rev.<Ty>'
1086 F->getParent(), Intrinsic::vector_reverse, F->getReturnType());
1087 return true;
1088 }
1089
1090 return false; // No other 'aarch64.sve.*'.
1091 }
1092 }
1093 return false; // No other 'arm.*', 'aarch64.*'.
1094}
1095
1097 StringRef Name) {
1098 if (Name.consume_front("cp.async.bulk.tensor.g2s.")) {
1101 .Case("im2col.3d",
1102 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1103 .Case("im2col.4d",
1104 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1105 .Case("im2col.5d",
1106 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1107 .Case("tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1108 .Case("tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1109 .Case("tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1110 .Case("tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1111 .Case("tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1113
1115 return ID;
1116
1117 // These intrinsics may need upgrade for two reasons:
1118 // (1) When the address-space of the first argument is shared[AS=3]
1119 // (and we upgrade it to use shared_cluster address-space[AS=7])
1120 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1122 return ID;
1123
1124 // (2) When there are only two boolean flag arguments at the end:
1125 //
1126 // The last three parameters of the older version of these
1127 // intrinsics are: arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag
1128 //
1129 // The newer version reads as:
1130 // arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag, i32 cta_group_flag
1131 //
1132 // So, when the type of the [N-3]rd argument is "not i1", then
1133 // it is the older version and we need to upgrade.
1134 size_t FlagStartIndex = F->getFunctionType()->getNumParams() - 3;
1135 Type *ArgType = F->getFunctionType()->getParamType(FlagStartIndex);
1136 if (!ArgType->isIntegerTy(1))
1137 return ID;
1138 }
1139
1141}
1142
1144 StringRef Name) {
1145 if (Name.consume_front("mapa.shared.cluster"))
1146 if (F->getReturnType()->getPointerAddressSpace() ==
1148 return Intrinsic::nvvm_mapa_shared_cluster;
1149
1150 if (Name.consume_front("cp.async.bulk.")) {
1153 .Case("global.to.shared.cluster",
1154 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1155 .Case("shared.cta.to.cluster",
1156 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1158
1160 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1162 return ID;
1163 }
1164
1166}
1167
1169 if (Name.consume_front("fma.rn."))
1170 return StringSwitch<Intrinsic::ID>(Name)
1171 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
1172 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1173 .Case("ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
1174 .Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
1175 .Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
1176 .Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
1177 .Case("ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
1178 .Case("ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
1179 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1180 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1181 .Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
1182 .Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
1184
1185 if (Name.consume_front("fmax."))
1186 return StringSwitch<Intrinsic::ID>(Name)
1187 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
1188 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1189 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1190 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1191 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1192 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1193 .Case("ftz.nan.xorsign.abs.bf16",
1194 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1195 .Case("ftz.nan.xorsign.abs.bf16x2",
1196 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1197 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1198 .Case("ftz.xorsign.abs.bf16x2",
1199 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1200 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1201 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1202 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1203 .Case("nan.xorsign.abs.bf16x2",
1204 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1205 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1206 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1208
1209 if (Name.consume_front("fmin."))
1210 return StringSwitch<Intrinsic::ID>(Name)
1211 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
1212 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1213 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1214 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1215 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1216 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1217 .Case("ftz.nan.xorsign.abs.bf16",
1218 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1219 .Case("ftz.nan.xorsign.abs.bf16x2",
1220 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1221 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1222 .Case("ftz.xorsign.abs.bf16x2",
1223 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1224 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1225 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1226 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1227 .Case("nan.xorsign.abs.bf16x2",
1228 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1229 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1230 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1232
1233 if (Name.consume_front("neg."))
1234 return StringSwitch<Intrinsic::ID>(Name)
1235 .Case("bf16", Intrinsic::nvvm_neg_bf16)
1236 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
1238
1240}
1241
1243 return Name.consume_front("local") || Name.consume_front("shared") ||
1244 Name.consume_front("global") || Name.consume_front("constant") ||
1245 Name.consume_front("param");
1246}
1247
1249 bool CanUpgradeDebugIntrinsicsToRecords) {
1250 assert(F && "Illegal to upgrade a non-existent Function.");
1251
1252 StringRef Name = F->getName();
1253
1254 // Quickly eliminate it, if it's not a candidate.
1255 if (!Name.consume_front("llvm.") || Name.empty())
1256 return false;
1257
1258 switch (Name[0]) {
1259 default: break;
1260 case 'a': {
1261 bool IsArm = Name.consume_front("arm.");
1262 if (IsArm || Name.consume_front("aarch64.")) {
1263 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1264 return true;
1265 break;
1266 }
1267
1268 if (Name.consume_front("amdgcn.")) {
1269 if (Name == "alignbit") {
1270 // Target specific intrinsic became redundant
1272 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1273 return true;
1274 }
1275
1276 if (Name.consume_front("atomic.")) {
1277 if (Name.starts_with("inc") || Name.starts_with("dec") ||
1278 Name.starts_with("cond.sub") || Name.starts_with("csub")) {
1279 // These were replaced with atomicrmw uinc_wrap, udec_wrap, usub_cond
1280 // and usub_sat so there's no new declaration.
1281 NewFn = nullptr;
1282 return true;
1283 }
1284 break; // No other 'amdgcn.atomic.*'
1285 }
1286
1287 if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
1288 Name.consume_front("flat.atomic.")) {
1289 if (Name.starts_with("fadd") ||
1290 // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1291 (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) ||
1292 (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) {
1293 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1294 // declaration.
1295 NewFn = nullptr;
1296 return true;
1297 }
1298 }
1299
1300 if (Name.starts_with("ldexp.")) {
1301 // Target specific intrinsic became redundant
1303 F->getParent(), Intrinsic::ldexp,
1304 {F->getReturnType(), F->getArg(1)->getType()});
1305 return true;
1306 }
1307 break; // No other 'amdgcn.*'
1308 }
1309
1310 break;
1311 }
1312 case 'c': {
1313 if (F->arg_size() == 1) {
1315 .StartsWith("ctlz.", Intrinsic::ctlz)
1316 .StartsWith("cttz.", Intrinsic::cttz)
1319 rename(F);
1320 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1321 F->arg_begin()->getType());
1322 return true;
1323 }
1324 }
1325
1326 if (F->arg_size() == 2 && Name == "coro.end") {
1327 rename(F);
1328 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1329 Intrinsic::coro_end);
1330 return true;
1331 }
1332
1333 break;
1334 }
1335 case 'd':
1336 if (Name.consume_front("dbg.")) {
1337 // Mark debug intrinsics for upgrade to new debug format.
1338 if (CanUpgradeDebugIntrinsicsToRecords) {
1339 if (Name == "addr" || Name == "value" || Name == "assign" ||
1340 Name == "declare" || Name == "label") {
1341 // There's no function to replace these with.
1342 NewFn = nullptr;
1343 // But we do want these to get upgraded.
1344 return true;
1345 }
1346 }
1347 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1348 // converted to DbgVariableRecords later.
1349 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1350 rename(F);
1351 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1352 Intrinsic::dbg_value);
1353 return true;
1354 }
1355 break; // No other 'dbg.*'.
1356 }
1357 break;
1358 case 'e':
1359 if (Name.consume_front("experimental.vector.")) {
1362 // Skip over extract.last.active, otherwise it will be 'upgraded'
1363 // to a regular vector extract which is a different operation.
1364 .StartsWith("extract.last.active.", Intrinsic::not_intrinsic)
1365 .StartsWith("extract.", Intrinsic::vector_extract)
1366 .StartsWith("insert.", Intrinsic::vector_insert)
1367 .StartsWith("splice.", Intrinsic::vector_splice)
1368 .StartsWith("reverse.", Intrinsic::vector_reverse)
1369 .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1370 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1371 .StartsWith("partial.reduce.add",
1372 Intrinsic::vector_partial_reduce_add)
1375 const auto *FT = F->getFunctionType();
1377 if (ID == Intrinsic::vector_extract ||
1378 ID == Intrinsic::vector_interleave2)
1379 // Extracting overloads the return type.
1380 Tys.push_back(FT->getReturnType());
1381 if (ID != Intrinsic::vector_interleave2)
1382 Tys.push_back(FT->getParamType(0));
1383 if (ID == Intrinsic::vector_insert ||
1384 ID == Intrinsic::vector_partial_reduce_add)
1385 // Inserting overloads the inserted type.
1386 Tys.push_back(FT->getParamType(1));
1387 rename(F);
1388 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
1389 return true;
1390 }
1391
1392 if (Name.consume_front("reduce.")) {
1394 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1395 if (R.match(Name, &Groups))
1397 .Case("add", Intrinsic::vector_reduce_add)
1398 .Case("mul", Intrinsic::vector_reduce_mul)
1399 .Case("and", Intrinsic::vector_reduce_and)
1400 .Case("or", Intrinsic::vector_reduce_or)
1401 .Case("xor", Intrinsic::vector_reduce_xor)
1402 .Case("smax", Intrinsic::vector_reduce_smax)
1403 .Case("smin", Intrinsic::vector_reduce_smin)
1404 .Case("umax", Intrinsic::vector_reduce_umax)
1405 .Case("umin", Intrinsic::vector_reduce_umin)
1406 .Case("fmax", Intrinsic::vector_reduce_fmax)
1407 .Case("fmin", Intrinsic::vector_reduce_fmin)
1409
1410 bool V2 = false;
1412 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1413 Groups.clear();
1414 V2 = true;
1415 if (R2.match(Name, &Groups))
1417 .Case("fadd", Intrinsic::vector_reduce_fadd)
1418 .Case("fmul", Intrinsic::vector_reduce_fmul)
1420 }
1422 rename(F);
1423 auto Args = F->getFunctionType()->params();
1424 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1425 {Args[V2 ? 1 : 0]});
1426 return true;
1427 }
1428 break; // No other 'expermental.vector.reduce.*'.
1429 }
1430 break; // No other 'experimental.vector.*'.
1431 }
1432 if (Name.consume_front("experimental.stepvector.")) {
1433 Intrinsic::ID ID = Intrinsic::stepvector;
1434 rename(F);
1436 F->getParent(), ID, F->getFunctionType()->getReturnType());
1437 return true;
1438 }
1439 break; // No other 'e*'.
1440 case 'f':
1441 if (Name.starts_with("flt.rounds")) {
1442 rename(F);
1443 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1444 Intrinsic::get_rounding);
1445 return true;
1446 }
1447 break;
1448 case 'i':
1449 if (Name.starts_with("invariant.group.barrier")) {
1450 // Rename invariant.group.barrier to launder.invariant.group
1451 auto Args = F->getFunctionType()->params();
1452 Type* ObjectPtr[1] = {Args[0]};
1453 rename(F);
1455 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1456 return true;
1457 }
1458 break;
1459 case 'l':
1460 if ((Name.starts_with("lifetime.start") ||
1461 Name.starts_with("lifetime.end")) &&
1462 F->arg_size() == 2) {
1463 Intrinsic::ID IID = Name.starts_with("lifetime.start")
1464 ? Intrinsic::lifetime_start
1465 : Intrinsic::lifetime_end;
1466 rename(F);
1467 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1468 F->getArg(0)->getType());
1469 return true;
1470 }
1471 break;
1472 case 'm': {
1473 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1474 // alignment parameter to embedding the alignment as an attribute of
1475 // the pointer args.
1476 if (unsigned ID = StringSwitch<unsigned>(Name)
1477 .StartsWith("memcpy.", Intrinsic::memcpy)
1478 .StartsWith("memmove.", Intrinsic::memmove)
1479 .Default(0)) {
1480 if (F->arg_size() == 5) {
1481 rename(F);
1482 // Get the types of dest, src, and len
1483 ArrayRef<Type *> ParamTypes =
1484 F->getFunctionType()->params().slice(0, 3);
1485 NewFn =
1486 Intrinsic::getOrInsertDeclaration(F->getParent(), ID, ParamTypes);
1487 return true;
1488 }
1489 }
1490 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1491 rename(F);
1492 // Get the types of dest, and len
1493 const auto *FT = F->getFunctionType();
1494 Type *ParamTypes[2] = {
1495 FT->getParamType(0), // Dest
1496 FT->getParamType(2) // len
1497 };
1498 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1499 Intrinsic::memset, ParamTypes);
1500 return true;
1501 }
1502
1503 unsigned MaskedID =
1505 .StartsWith("masked.load", Intrinsic::masked_load)
1506 .StartsWith("masked.gather", Intrinsic::masked_gather)
1507 .StartsWith("masked.store", Intrinsic::masked_store)
1508 .StartsWith("masked.scatter", Intrinsic::masked_scatter)
1509 .Default(0);
1510 if (MaskedID && F->arg_size() == 4) {
1511 rename(F);
1512 if (MaskedID == Intrinsic::masked_load ||
1513 MaskedID == Intrinsic::masked_gather) {
1515 F->getParent(), MaskedID,
1516 {F->getReturnType(), F->getArg(0)->getType()});
1517 return true;
1518 }
1520 F->getParent(), MaskedID,
1521 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1522 return true;
1523 }
1524 break;
1525 }
1526 case 'n': {
1527 if (Name.consume_front("nvvm.")) {
1528 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1529 if (F->arg_size() == 1) {
1530 Intrinsic::ID IID =
1532 .Cases({"brev32", "brev64"}, Intrinsic::bitreverse)
1533 .Case("clz.i", Intrinsic::ctlz)
1534 .Case("popc.i", Intrinsic::ctpop)
1536 if (IID != Intrinsic::not_intrinsic) {
1537 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1538 {F->getReturnType()});
1539 return true;
1540 }
1541 }
1542
1543 // Check for nvvm intrinsics that need a return type adjustment.
1544 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1546 if (IID != Intrinsic::not_intrinsic) {
1547 NewFn = nullptr;
1548 return true;
1549 }
1550 }
1551
1552 // Upgrade Distributed Shared Memory Intrinsics
1554 if (IID != Intrinsic::not_intrinsic) {
1555 rename(F);
1556 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1557 return true;
1558 }
1559
1560 // Upgrade TMA copy G2S Intrinsics
1562 if (IID != Intrinsic::not_intrinsic) {
1563 rename(F);
1564 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1565 return true;
1566 }
1567
1568 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1569 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1570 //
1571 // TODO: We could add lohi.i2d.
1572 bool Expand = false;
1573 if (Name.consume_front("abs."))
1574 // nvvm.abs.{i,ii}
1575 Expand =
1576 Name == "i" || Name == "ll" || Name == "bf16" || Name == "bf16x2";
1577 else if (Name.consume_front("fabs."))
1578 // nvvm.fabs.{f,ftz.f,d}
1579 Expand = Name == "f" || Name == "ftz.f" || Name == "d";
1580 else if (Name.consume_front("ex2.approx."))
1581 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
1582 Expand =
1583 Name == "f" || Name == "ftz.f" || Name == "d" || Name == "f16x2";
1584 else if (Name.consume_front("max.") || Name.consume_front("min."))
1585 // nvvm.{min,max}.{i,ii,ui,ull}
1586 Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
1587 Name == "ui" || Name == "ull";
1588 else if (Name.consume_front("atomic.load."))
1589 // nvvm.atomic.load.add.{f32,f64}.p
1590 // nvvm.atomic.load.{inc,dec}.32.p
1591 Expand = StringSwitch<bool>(Name)
1592 .StartsWith("add.f32.p", true)
1593 .StartsWith("add.f64.p", true)
1594 .StartsWith("inc.32.p", true)
1595 .StartsWith("dec.32.p", true)
1596 .Default(false);
1597 else if (Name.consume_front("bitcast."))
1598 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1599 Expand =
1600 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1601 else if (Name.consume_front("rotate."))
1602 // nvvm.rotate.{b32,b64,right.b64}
1603 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1604 else if (Name.consume_front("ptr.gen.to."))
1605 // nvvm.ptr.gen.to.{local,shared,global,constant,param}
1606 Expand = consumeNVVMPtrAddrSpace(Name);
1607 else if (Name.consume_front("ptr."))
1608 // nvvm.ptr.{local,shared,global,constant,param}.to.gen
1609 Expand = consumeNVVMPtrAddrSpace(Name) && Name.starts_with(".to.gen");
1610 else if (Name.consume_front("ldg.global."))
1611 // nvvm.ldg.global.{i,p,f}
1612 Expand = (Name.starts_with("i.") || Name.starts_with("f.") ||
1613 Name.starts_with("p."));
1614 else
1615 Expand = StringSwitch<bool>(Name)
1616 .Case("barrier0", true)
1617 .Case("barrier.n", true)
1618 .Case("barrier.sync.cnt", true)
1619 .Case("barrier.sync", true)
1620 .Case("barrier", true)
1621 .Case("bar.sync", true)
1622 .Case("clz.ll", true)
1623 .Case("popc.ll", true)
1624 .Case("h2f", true)
1625 .Case("swap.lo.hi.b64", true)
1626 .Case("tanh.approx.f32", true)
1627 .Default(false);
1628
1629 if (Expand) {
1630 NewFn = nullptr;
1631 return true;
1632 }
1633 break; // No other 'nvvm.*'.
1634 }
1635 break;
1636 }
1637 case 'o':
1638 if (Name.starts_with("objectsize.")) {
1639 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1640 if (F->arg_size() == 2 || F->arg_size() == 3) {
1641 rename(F);
1642 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1643 Intrinsic::objectsize, Tys);
1644 return true;
1645 }
1646 }
1647 break;
1648
1649 case 'p':
1650 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1651 rename(F);
1653 F->getParent(), Intrinsic::ptr_annotation,
1654 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1655 return true;
1656 }
1657 break;
1658
1659 case 'r': {
1660 if (Name.consume_front("riscv.")) {
1663 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1664 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1665 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1666 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1669 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1670 rename(F);
1671 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1672 return true;
1673 }
1674 break; // No other applicable upgrades.
1675 }
1676
1678 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1679 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1682 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1683 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1684 rename(F);
1685 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1686 return true;
1687 }
1688 break; // No other applicable upgrades.
1689 }
1690
1692 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1693 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1694 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1695 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1696 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1697 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1700 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1701 rename(F);
1702 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1703 return true;
1704 }
1705 break; // No other applicable upgrades.
1706 }
1707 break; // No other 'riscv.*' intrinsics
1708 }
1709 } break;
1710
1711 case 's':
1712 if (Name == "stackprotectorcheck") {
1713 NewFn = nullptr;
1714 return true;
1715 }
1716 break;
1717
1718 case 't':
1719 if (Name == "thread.pointer") {
1721 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
1722 return true;
1723 }
1724 break;
1725
1726 case 'v': {
1727 if (Name == "var.annotation" && F->arg_size() == 4) {
1728 rename(F);
1730 F->getParent(), Intrinsic::var_annotation,
1731 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1732 return true;
1733 }
1734 break;
1735 }
1736
1737 case 'w':
1738 if (Name.consume_front("wasm.")) {
1741 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1742 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1743 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1746 rename(F);
1747 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1748 F->getReturnType());
1749 return true;
1750 }
1751
1752 if (Name.consume_front("dot.i8x16.i7x16.")) {
1754 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1755 .Case("add.signed",
1756 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1759 rename(F);
1760 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1761 return true;
1762 }
1763 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1764 }
1765 break; // No other 'wasm.*'.
1766 }
1767 break;
1768
1769 case 'x':
1770 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1771 return true;
1772 }
1773
1774 auto *ST = dyn_cast<StructType>(F->getReturnType());
1775 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1776 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1777 // Replace return type with literal non-packed struct. Only do this for
1778 // intrinsics declared to return a struct, not for intrinsics with
1779 // overloaded return type, in which case the exact struct type will be
1780 // mangled into the name.
1783 if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1784 auto *FT = F->getFunctionType();
1785 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1786 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1787 std::string Name = F->getName().str();
1788 rename(F);
1789 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1790 Name, F->getParent());
1791
1792 // The new function may also need remangling.
1793 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1794 NewFn = *Result;
1795 return true;
1796 }
1797 }
1798
1799 // Remangle our intrinsic since we upgrade the mangling
1801 if (Result != std::nullopt) {
1802 NewFn = *Result;
1803 return true;
1804 }
1805
1806 // This may not belong here. This function is effectively being overloaded
1807 // to both detect an intrinsic which needs upgrading, and to provide the
1808 // upgraded form of the intrinsic. We should perhaps have two separate
1809 // functions for this.
1810 return false;
1811}
1812
1814 bool CanUpgradeDebugIntrinsicsToRecords) {
1815 NewFn = nullptr;
1816 bool Upgraded =
1817 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1818
1819 // Upgrade intrinsic attributes. This does not change the function.
1820 if (NewFn)
1821 F = NewFn;
1822 if (Intrinsic::ID id = F->getIntrinsicID()) {
1823 // Only do this if the intrinsic signature is valid.
1824 SmallVector<Type *> OverloadTys;
1825 if (Intrinsic::getIntrinsicSignature(id, F->getFunctionType(), OverloadTys))
1826 F->setAttributes(
1827 Intrinsic::getAttributes(F->getContext(), id, F->getFunctionType()));
1828 }
1829 return Upgraded;
1830}
1831
1833 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1834 GV->getName() == "llvm.global_dtors")) ||
1835 !GV->hasInitializer())
1836 return nullptr;
1838 if (!ATy)
1839 return nullptr;
1841 if (!STy || STy->getNumElements() != 2)
1842 return nullptr;
1843
1844 LLVMContext &C = GV->getContext();
1845 IRBuilder<> IRB(C);
1846 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1847 IRB.getPtrTy());
1848 Constant *Init = GV->getInitializer();
1849 unsigned N = Init->getNumOperands();
1850 std::vector<Constant *> NewCtors(N);
1851 for (unsigned i = 0; i != N; ++i) {
1852 auto Ctor = cast<Constant>(Init->getOperand(i));
1853 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1854 Ctor->getAggregateElement(1),
1856 }
1857 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1858
1859 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1860 NewInit, GV->getName());
1861}
1862
1863// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1864// to byte shuffles.
1866 unsigned Shift) {
1867 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1868 unsigned NumElts = ResultTy->getNumElements() * 8;
1869
1870 // Bitcast from a 64-bit element type to a byte element type.
1871 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1872 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1873
1874 // We'll be shuffling in zeroes.
1875 Value *Res = Constant::getNullValue(VecTy);
1876
1877 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1878 // we'll just return the zero vector.
1879 if (Shift < 16) {
1880 int Idxs[64];
1881 // 256/512-bit version is split into 2/4 16-byte lanes.
1882 for (unsigned l = 0; l != NumElts; l += 16)
1883 for (unsigned i = 0; i != 16; ++i) {
1884 unsigned Idx = NumElts + i - Shift;
1885 if (Idx < NumElts)
1886 Idx -= NumElts - 16; // end of lane, switch operand.
1887 Idxs[l + i] = Idx + l;
1888 }
1889
1890 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1891 }
1892
1893 // Bitcast back to a 64-bit element type.
1894 return Builder.CreateBitCast(Res, ResultTy, "cast");
1895}
1896
1897// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1898// to byte shuffles.
1900 unsigned Shift) {
1901 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1902 unsigned NumElts = ResultTy->getNumElements() * 8;
1903
1904 // Bitcast from a 64-bit element type to a byte element type.
1905 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1906 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1907
1908 // We'll be shuffling in zeroes.
1909 Value *Res = Constant::getNullValue(VecTy);
1910
1911 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1912 // we'll just return the zero vector.
1913 if (Shift < 16) {
1914 int Idxs[64];
1915 // 256/512-bit version is split into 2/4 16-byte lanes.
1916 for (unsigned l = 0; l != NumElts; l += 16)
1917 for (unsigned i = 0; i != 16; ++i) {
1918 unsigned Idx = i + Shift;
1919 if (Idx >= 16)
1920 Idx += NumElts - 16; // end of lane, switch operand.
1921 Idxs[l + i] = Idx + l;
1922 }
1923
1924 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
1925 }
1926
1927 // Bitcast back to a 64-bit element type.
1928 return Builder.CreateBitCast(Res, ResultTy, "cast");
1929}
1930
1931static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1932 unsigned NumElts) {
1933 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1935 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1936 Mask = Builder.CreateBitCast(Mask, MaskTy);
1937
1938 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1939 // i8 and we need to extract down to the right number of elements.
1940 if (NumElts <= 4) {
1941 int Indices[4];
1942 for (unsigned i = 0; i != NumElts; ++i)
1943 Indices[i] = i;
1944 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
1945 "extract");
1946 }
1947
1948 return Mask;
1949}
1950
1951static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1952 Value *Op1) {
1953 // If the mask is all ones just emit the first operation.
1954 if (const auto *C = dyn_cast<Constant>(Mask))
1955 if (C->isAllOnesValue())
1956 return Op0;
1957
1958 Mask = getX86MaskVec(Builder, Mask,
1959 cast<FixedVectorType>(Op0->getType())->getNumElements());
1960 return Builder.CreateSelect(Mask, Op0, Op1);
1961}
1962
1963static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1964 Value *Op1) {
1965 // If the mask is all ones just emit the first operation.
1966 if (const auto *C = dyn_cast<Constant>(Mask))
1967 if (C->isAllOnesValue())
1968 return Op0;
1969
1970 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1971 Mask->getType()->getIntegerBitWidth());
1972 Mask = Builder.CreateBitCast(Mask, MaskTy);
1973 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1974 return Builder.CreateSelect(Mask, Op0, Op1);
1975}
1976
1977// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1978// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1979// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1981 Value *Op1, Value *Shift,
1982 Value *Passthru, Value *Mask,
1983 bool IsVALIGN) {
1984 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1985
1986 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1987 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1988 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1989 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1990
1991 // Mask the immediate for VALIGN.
1992 if (IsVALIGN)
1993 ShiftVal &= (NumElts - 1);
1994
1995 // If palignr is shifting the pair of vectors more than the size of two
1996 // lanes, emit zero.
1997 if (ShiftVal >= 32)
1999
2000 // If palignr is shifting the pair of input vectors more than one lane,
2001 // but less than two lanes, convert to shifting in zeroes.
2002 if (ShiftVal > 16) {
2003 ShiftVal -= 16;
2004 Op1 = Op0;
2006 }
2007
2008 int Indices[64];
2009 // 256-bit palignr operates on 128-bit lanes so we need to handle that
2010 for (unsigned l = 0; l < NumElts; l += 16) {
2011 for (unsigned i = 0; i != 16; ++i) {
2012 unsigned Idx = ShiftVal + i;
2013 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
2014 Idx += NumElts - 16; // End of lane, switch operand.
2015 Indices[l + i] = Idx + l;
2016 }
2017 }
2018
2019 Value *Align = Builder.CreateShuffleVector(
2020 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
2021
2022 return emitX86Select(Builder, Mask, Align, Passthru);
2023}
2024
2026 bool ZeroMask, bool IndexForm) {
2027 Type *Ty = CI.getType();
2028 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
2029 unsigned EltWidth = Ty->getScalarSizeInBits();
2030 bool IsFloat = Ty->isFPOrFPVectorTy();
2031 Intrinsic::ID IID;
2032 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2033 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2034 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2035 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2036 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2037 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2038 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2039 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2040 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2041 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2042 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2043 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2044 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2045 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2046 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2047 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2048 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2049 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2050 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2051 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2052 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2053 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2054 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2055 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2056 else if (VecWidth == 128 && EltWidth == 16)
2057 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2058 else if (VecWidth == 256 && EltWidth == 16)
2059 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2060 else if (VecWidth == 512 && EltWidth == 16)
2061 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2062 else if (VecWidth == 128 && EltWidth == 8)
2063 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2064 else if (VecWidth == 256 && EltWidth == 8)
2065 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2066 else if (VecWidth == 512 && EltWidth == 8)
2067 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2068 else
2069 llvm_unreachable("Unexpected intrinsic");
2070
2071 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
2072 CI.getArgOperand(2) };
2073
2074 // If this isn't index form we need to swap operand 0 and 1.
2075 if (!IndexForm)
2076 std::swap(Args[0], Args[1]);
2077
2078 Value *V = Builder.CreateIntrinsic(IID, Args);
2079 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
2080 : Builder.CreateBitCast(CI.getArgOperand(1),
2081 Ty);
2082 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
2083}
2084
2086 Intrinsic::ID IID) {
2087 Type *Ty = CI.getType();
2088 Value *Op0 = CI.getOperand(0);
2089 Value *Op1 = CI.getOperand(1);
2090 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2091
2092 if (CI.arg_size() == 4) { // For masked intrinsics.
2093 Value *VecSrc = CI.getOperand(2);
2094 Value *Mask = CI.getOperand(3);
2095 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2096 }
2097 return Res;
2098}
2099
2101 bool IsRotateRight) {
2102 Type *Ty = CI.getType();
2103 Value *Src = CI.getArgOperand(0);
2104 Value *Amt = CI.getArgOperand(1);
2105
2106 // Amount may be scalar immediate, in which case create a splat vector.
2107 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2108 // we only care about the lowest log2 bits anyway.
2109 if (Amt->getType() != Ty) {
2110 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2111 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2112 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2113 }
2114
2115 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2116 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2117
2118 if (CI.arg_size() == 4) { // For masked intrinsics.
2119 Value *VecSrc = CI.getOperand(2);
2120 Value *Mask = CI.getOperand(3);
2121 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2122 }
2123 return Res;
2124}
2125
2126static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
2127 bool IsSigned) {
2128 Type *Ty = CI.getType();
2129 Value *LHS = CI.getArgOperand(0);
2130 Value *RHS = CI.getArgOperand(1);
2131
2132 CmpInst::Predicate Pred;
2133 switch (Imm) {
2134 case 0x0:
2135 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
2136 break;
2137 case 0x1:
2138 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
2139 break;
2140 case 0x2:
2141 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
2142 break;
2143 case 0x3:
2144 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
2145 break;
2146 case 0x4:
2147 Pred = ICmpInst::ICMP_EQ;
2148 break;
2149 case 0x5:
2150 Pred = ICmpInst::ICMP_NE;
2151 break;
2152 case 0x6:
2153 return Constant::getNullValue(Ty); // FALSE
2154 case 0x7:
2155 return Constant::getAllOnesValue(Ty); // TRUE
2156 default:
2157 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
2158 }
2159
2160 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
2161 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2162 return Ext;
2163}
2164
2166 bool IsShiftRight, bool ZeroMask) {
2167 Type *Ty = CI.getType();
2168 Value *Op0 = CI.getArgOperand(0);
2169 Value *Op1 = CI.getArgOperand(1);
2170 Value *Amt = CI.getArgOperand(2);
2171
2172 if (IsShiftRight)
2173 std::swap(Op0, Op1);
2174
2175 // Amount may be scalar immediate, in which case create a splat vector.
2176 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2177 // we only care about the lowest log2 bits anyway.
2178 if (Amt->getType() != Ty) {
2179 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2180 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2181 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2182 }
2183
2184 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2185 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2186
2187 unsigned NumArgs = CI.arg_size();
2188 if (NumArgs >= 4) { // For masked intrinsics.
2189 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
2190 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
2191 CI.getArgOperand(0);
2192 Value *Mask = CI.getOperand(NumArgs - 1);
2193 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2194 }
2195 return Res;
2196}
2197
2199 Value *Mask, bool Aligned) {
2200 const Align Alignment =
2201 Aligned
2202 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2203 : Align(1);
2204
2205 // If the mask is all ones just emit a regular store.
2206 if (const auto *C = dyn_cast<Constant>(Mask))
2207 if (C->isAllOnesValue())
2208 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
2209
2210 // Convert the mask from an integer type to a vector of i1.
2211 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
2212 Mask = getX86MaskVec(Builder, Mask, NumElts);
2213 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
2214}
2215
2217 Value *Passthru, Value *Mask, bool Aligned) {
2218 Type *ValTy = Passthru->getType();
2219 const Align Alignment =
2220 Aligned
2221 ? Align(
2223 8)
2224 : Align(1);
2225
2226 // If the mask is all ones just emit a regular store.
2227 if (const auto *C = dyn_cast<Constant>(Mask))
2228 if (C->isAllOnesValue())
2229 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2230
2231 // Convert the mask from an integer type to a vector of i1.
2232 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
2233 Mask = getX86MaskVec(Builder, Mask, NumElts);
2234 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2235}
2236
2237static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
2238 Type *Ty = CI.getType();
2239 Value *Op0 = CI.getArgOperand(0);
2240 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2241 {Op0, Builder.getInt1(false)});
2242 if (CI.arg_size() == 3)
2243 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
2244 return Res;
2245}
2246
2247static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
2248 Type *Ty = CI.getType();
2249
2250 // Arguments have a vXi32 type so cast to vXi64.
2251 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
2252 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
2253
2254 if (IsSigned) {
2255 // Shift left then arithmetic shift right.
2256 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2257 LHS = Builder.CreateShl(LHS, ShiftAmt);
2258 LHS = Builder.CreateAShr(LHS, ShiftAmt);
2259 RHS = Builder.CreateShl(RHS, ShiftAmt);
2260 RHS = Builder.CreateAShr(RHS, ShiftAmt);
2261 } else {
2262 // Clear the upper bits.
2263 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2264 LHS = Builder.CreateAnd(LHS, Mask);
2265 RHS = Builder.CreateAnd(RHS, Mask);
2266 }
2267
2268 Value *Res = Builder.CreateMul(LHS, RHS);
2269
2270 if (CI.arg_size() == 4)
2271 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
2272
2273 return Res;
2274}
2275
2276// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
2278 Value *Mask) {
2279 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
2280 if (Mask) {
2281 const auto *C = dyn_cast<Constant>(Mask);
2282 if (!C || !C->isAllOnesValue())
2283 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
2284 }
2285
2286 if (NumElts < 8) {
2287 int Indices[8];
2288 for (unsigned i = 0; i != NumElts; ++i)
2289 Indices[i] = i;
2290 for (unsigned i = NumElts; i != 8; ++i)
2291 Indices[i] = NumElts + i % NumElts;
2292 Vec = Builder.CreateShuffleVector(Vec,
2294 Indices);
2295 }
2296 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2297}
2298
2300 unsigned CC, bool Signed) {
2301 Value *Op0 = CI.getArgOperand(0);
2302 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2303
2304 Value *Cmp;
2305 if (CC == 3) {
2307 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2308 } else if (CC == 7) {
2310 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2311 } else {
2313 switch (CC) {
2314 default: llvm_unreachable("Unknown condition code");
2315 case 0: Pred = ICmpInst::ICMP_EQ; break;
2316 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
2317 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
2318 case 4: Pred = ICmpInst::ICMP_NE; break;
2319 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
2320 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
2321 }
2322 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
2323 }
2324
2325 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
2326
2327 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
2328}
2329
2330// Replace a masked intrinsic with an older unmasked intrinsic.
2332 Intrinsic::ID IID) {
2333 Value *Rep =
2334 Builder.CreateIntrinsic(IID, {CI.getArgOperand(0), CI.getArgOperand(1)});
2335 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
2336}
2337
2339 Value* A = CI.getArgOperand(0);
2340 Value* B = CI.getArgOperand(1);
2341 Value* Src = CI.getArgOperand(2);
2342 Value* Mask = CI.getArgOperand(3);
2343
2344 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
2345 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2346 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
2347 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
2348 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2349 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
2350}
2351
2353 Value* Op = CI.getArgOperand(0);
2354 Type* ReturnOp = CI.getType();
2355 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
2356 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
2357 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
2358}
2359
2360// Replace intrinsic with unmasked version and a select.
2362 CallBase &CI, Value *&Rep) {
2363 Name = Name.substr(12); // Remove avx512.mask.
2364
2365 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2366 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2367 Intrinsic::ID IID;
2368 if (Name.starts_with("max.p")) {
2369 if (VecWidth == 128 && EltWidth == 32)
2370 IID = Intrinsic::x86_sse_max_ps;
2371 else if (VecWidth == 128 && EltWidth == 64)
2372 IID = Intrinsic::x86_sse2_max_pd;
2373 else if (VecWidth == 256 && EltWidth == 32)
2374 IID = Intrinsic::x86_avx_max_ps_256;
2375 else if (VecWidth == 256 && EltWidth == 64)
2376 IID = Intrinsic::x86_avx_max_pd_256;
2377 else
2378 llvm_unreachable("Unexpected intrinsic");
2379 } else if (Name.starts_with("min.p")) {
2380 if (VecWidth == 128 && EltWidth == 32)
2381 IID = Intrinsic::x86_sse_min_ps;
2382 else if (VecWidth == 128 && EltWidth == 64)
2383 IID = Intrinsic::x86_sse2_min_pd;
2384 else if (VecWidth == 256 && EltWidth == 32)
2385 IID = Intrinsic::x86_avx_min_ps_256;
2386 else if (VecWidth == 256 && EltWidth == 64)
2387 IID = Intrinsic::x86_avx_min_pd_256;
2388 else
2389 llvm_unreachable("Unexpected intrinsic");
2390 } else if (Name.starts_with("pshuf.b.")) {
2391 if (VecWidth == 128)
2392 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2393 else if (VecWidth == 256)
2394 IID = Intrinsic::x86_avx2_pshuf_b;
2395 else if (VecWidth == 512)
2396 IID = Intrinsic::x86_avx512_pshuf_b_512;
2397 else
2398 llvm_unreachable("Unexpected intrinsic");
2399 } else if (Name.starts_with("pmul.hr.sw.")) {
2400 if (VecWidth == 128)
2401 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2402 else if (VecWidth == 256)
2403 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2404 else if (VecWidth == 512)
2405 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2406 else
2407 llvm_unreachable("Unexpected intrinsic");
2408 } else if (Name.starts_with("pmulh.w.")) {
2409 if (VecWidth == 128)
2410 IID = Intrinsic::x86_sse2_pmulh_w;
2411 else if (VecWidth == 256)
2412 IID = Intrinsic::x86_avx2_pmulh_w;
2413 else if (VecWidth == 512)
2414 IID = Intrinsic::x86_avx512_pmulh_w_512;
2415 else
2416 llvm_unreachable("Unexpected intrinsic");
2417 } else if (Name.starts_with("pmulhu.w.")) {
2418 if (VecWidth == 128)
2419 IID = Intrinsic::x86_sse2_pmulhu_w;
2420 else if (VecWidth == 256)
2421 IID = Intrinsic::x86_avx2_pmulhu_w;
2422 else if (VecWidth == 512)
2423 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2424 else
2425 llvm_unreachable("Unexpected intrinsic");
2426 } else if (Name.starts_with("pmaddw.d.")) {
2427 if (VecWidth == 128)
2428 IID = Intrinsic::x86_sse2_pmadd_wd;
2429 else if (VecWidth == 256)
2430 IID = Intrinsic::x86_avx2_pmadd_wd;
2431 else if (VecWidth == 512)
2432 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2433 else
2434 llvm_unreachable("Unexpected intrinsic");
2435 } else if (Name.starts_with("pmaddubs.w.")) {
2436 if (VecWidth == 128)
2437 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2438 else if (VecWidth == 256)
2439 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2440 else if (VecWidth == 512)
2441 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2442 else
2443 llvm_unreachable("Unexpected intrinsic");
2444 } else if (Name.starts_with("packsswb.")) {
2445 if (VecWidth == 128)
2446 IID = Intrinsic::x86_sse2_packsswb_128;
2447 else if (VecWidth == 256)
2448 IID = Intrinsic::x86_avx2_packsswb;
2449 else if (VecWidth == 512)
2450 IID = Intrinsic::x86_avx512_packsswb_512;
2451 else
2452 llvm_unreachable("Unexpected intrinsic");
2453 } else if (Name.starts_with("packssdw.")) {
2454 if (VecWidth == 128)
2455 IID = Intrinsic::x86_sse2_packssdw_128;
2456 else if (VecWidth == 256)
2457 IID = Intrinsic::x86_avx2_packssdw;
2458 else if (VecWidth == 512)
2459 IID = Intrinsic::x86_avx512_packssdw_512;
2460 else
2461 llvm_unreachable("Unexpected intrinsic");
2462 } else if (Name.starts_with("packuswb.")) {
2463 if (VecWidth == 128)
2464 IID = Intrinsic::x86_sse2_packuswb_128;
2465 else if (VecWidth == 256)
2466 IID = Intrinsic::x86_avx2_packuswb;
2467 else if (VecWidth == 512)
2468 IID = Intrinsic::x86_avx512_packuswb_512;
2469 else
2470 llvm_unreachable("Unexpected intrinsic");
2471 } else if (Name.starts_with("packusdw.")) {
2472 if (VecWidth == 128)
2473 IID = Intrinsic::x86_sse41_packusdw;
2474 else if (VecWidth == 256)
2475 IID = Intrinsic::x86_avx2_packusdw;
2476 else if (VecWidth == 512)
2477 IID = Intrinsic::x86_avx512_packusdw_512;
2478 else
2479 llvm_unreachable("Unexpected intrinsic");
2480 } else if (Name.starts_with("vpermilvar.")) {
2481 if (VecWidth == 128 && EltWidth == 32)
2482 IID = Intrinsic::x86_avx_vpermilvar_ps;
2483 else if (VecWidth == 128 && EltWidth == 64)
2484 IID = Intrinsic::x86_avx_vpermilvar_pd;
2485 else if (VecWidth == 256 && EltWidth == 32)
2486 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2487 else if (VecWidth == 256 && EltWidth == 64)
2488 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2489 else if (VecWidth == 512 && EltWidth == 32)
2490 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2491 else if (VecWidth == 512 && EltWidth == 64)
2492 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2493 else
2494 llvm_unreachable("Unexpected intrinsic");
2495 } else if (Name == "cvtpd2dq.256") {
2496 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2497 } else if (Name == "cvtpd2ps.256") {
2498 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2499 } else if (Name == "cvttpd2dq.256") {
2500 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2501 } else if (Name == "cvttps2dq.128") {
2502 IID = Intrinsic::x86_sse2_cvttps2dq;
2503 } else if (Name == "cvttps2dq.256") {
2504 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2505 } else if (Name.starts_with("permvar.")) {
2506 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2507 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2508 IID = Intrinsic::x86_avx2_permps;
2509 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2510 IID = Intrinsic::x86_avx2_permd;
2511 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2512 IID = Intrinsic::x86_avx512_permvar_df_256;
2513 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2514 IID = Intrinsic::x86_avx512_permvar_di_256;
2515 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2516 IID = Intrinsic::x86_avx512_permvar_sf_512;
2517 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2518 IID = Intrinsic::x86_avx512_permvar_si_512;
2519 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2520 IID = Intrinsic::x86_avx512_permvar_df_512;
2521 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2522 IID = Intrinsic::x86_avx512_permvar_di_512;
2523 else if (VecWidth == 128 && EltWidth == 16)
2524 IID = Intrinsic::x86_avx512_permvar_hi_128;
2525 else if (VecWidth == 256 && EltWidth == 16)
2526 IID = Intrinsic::x86_avx512_permvar_hi_256;
2527 else if (VecWidth == 512 && EltWidth == 16)
2528 IID = Intrinsic::x86_avx512_permvar_hi_512;
2529 else if (VecWidth == 128 && EltWidth == 8)
2530 IID = Intrinsic::x86_avx512_permvar_qi_128;
2531 else if (VecWidth == 256 && EltWidth == 8)
2532 IID = Intrinsic::x86_avx512_permvar_qi_256;
2533 else if (VecWidth == 512 && EltWidth == 8)
2534 IID = Intrinsic::x86_avx512_permvar_qi_512;
2535 else
2536 llvm_unreachable("Unexpected intrinsic");
2537 } else if (Name.starts_with("dbpsadbw.")) {
2538 if (VecWidth == 128)
2539 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2540 else if (VecWidth == 256)
2541 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2542 else if (VecWidth == 512)
2543 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2544 else
2545 llvm_unreachable("Unexpected intrinsic");
2546 } else if (Name.starts_with("pmultishift.qb.")) {
2547 if (VecWidth == 128)
2548 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2549 else if (VecWidth == 256)
2550 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2551 else if (VecWidth == 512)
2552 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2553 else
2554 llvm_unreachable("Unexpected intrinsic");
2555 } else if (Name.starts_with("conflict.")) {
2556 if (Name[9] == 'd' && VecWidth == 128)
2557 IID = Intrinsic::x86_avx512_conflict_d_128;
2558 else if (Name[9] == 'd' && VecWidth == 256)
2559 IID = Intrinsic::x86_avx512_conflict_d_256;
2560 else if (Name[9] == 'd' && VecWidth == 512)
2561 IID = Intrinsic::x86_avx512_conflict_d_512;
2562 else if (Name[9] == 'q' && VecWidth == 128)
2563 IID = Intrinsic::x86_avx512_conflict_q_128;
2564 else if (Name[9] == 'q' && VecWidth == 256)
2565 IID = Intrinsic::x86_avx512_conflict_q_256;
2566 else if (Name[9] == 'q' && VecWidth == 512)
2567 IID = Intrinsic::x86_avx512_conflict_q_512;
2568 else
2569 llvm_unreachable("Unexpected intrinsic");
2570 } else if (Name.starts_with("pavg.")) {
2571 if (Name[5] == 'b' && VecWidth == 128)
2572 IID = Intrinsic::x86_sse2_pavg_b;
2573 else if (Name[5] == 'b' && VecWidth == 256)
2574 IID = Intrinsic::x86_avx2_pavg_b;
2575 else if (Name[5] == 'b' && VecWidth == 512)
2576 IID = Intrinsic::x86_avx512_pavg_b_512;
2577 else if (Name[5] == 'w' && VecWidth == 128)
2578 IID = Intrinsic::x86_sse2_pavg_w;
2579 else if (Name[5] == 'w' && VecWidth == 256)
2580 IID = Intrinsic::x86_avx2_pavg_w;
2581 else if (Name[5] == 'w' && VecWidth == 512)
2582 IID = Intrinsic::x86_avx512_pavg_w_512;
2583 else
2584 llvm_unreachable("Unexpected intrinsic");
2585 } else
2586 return false;
2587
2588 SmallVector<Value *, 4> Args(CI.args());
2589 Args.pop_back();
2590 Args.pop_back();
2591 Rep = Builder.CreateIntrinsic(IID, Args);
2592 unsigned NumArgs = CI.arg_size();
2593 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2594 CI.getArgOperand(NumArgs - 2));
2595 return true;
2596}
2597
2598/// Upgrade comment in call to inline asm that represents an objc retain release
2599/// marker.
2600void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2601 size_t Pos;
2602 if (AsmStr->find("mov\tfp") == 0 &&
2603 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2604 (Pos = AsmStr->find("# marker")) != std::string::npos) {
2605 AsmStr->replace(Pos, 1, ";");
2606 }
2607}
2608
2610 Function *F, IRBuilder<> &Builder) {
2611 Value *Rep = nullptr;
2612
2613 if (Name == "abs.i" || Name == "abs.ll") {
2614 Value *Arg = CI->getArgOperand(0);
2615 Value *Neg = Builder.CreateNeg(Arg, "neg");
2616 Value *Cmp = Builder.CreateICmpSGE(
2617 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
2618 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
2619 } else if (Name == "abs.bf16" || Name == "abs.bf16x2") {
2620 Type *Ty = (Name == "abs.bf16")
2621 ? Builder.getBFloatTy()
2622 : FixedVectorType::get(Builder.getBFloatTy(), 2);
2623 Value *Arg = Builder.CreateBitCast(CI->getArgOperand(0), Ty);
2624 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2625 Rep = Builder.CreateBitCast(Abs, CI->getType());
2626 } else if (Name == "fabs.f" || Name == "fabs.ftz.f" || Name == "fabs.d") {
2627 Intrinsic::ID IID = (Name == "fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2628 : Intrinsic::nvvm_fabs;
2629 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2630 } else if (Name.consume_front("ex2.approx.")) {
2631 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
2632 Intrinsic::ID IID = Name.starts_with("ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2633 : Intrinsic::nvvm_ex2_approx;
2634 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2635 } else if (Name.starts_with("atomic.load.add.f32.p") ||
2636 Name.starts_with("atomic.load.add.f64.p")) {
2637 Value *Ptr = CI->getArgOperand(0);
2638 Value *Val = CI->getArgOperand(1);
2639 Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
2641 } else if (Name.starts_with("atomic.load.inc.32.p") ||
2642 Name.starts_with("atomic.load.dec.32.p")) {
2643 Value *Ptr = CI->getArgOperand(0);
2644 Value *Val = CI->getArgOperand(1);
2645 auto Op = Name.starts_with("atomic.load.inc") ? AtomicRMWInst::UIncWrap
2647 Rep = Builder.CreateAtomicRMW(Op, Ptr, Val, MaybeAlign(),
2649 } else if (Name.consume_front("max.") &&
2650 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2651 Name == "ui" || Name == "ull")) {
2652 Value *Arg0 = CI->getArgOperand(0);
2653 Value *Arg1 = CI->getArgOperand(1);
2654 Value *Cmp = Name.starts_with("u")
2655 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
2656 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
2657 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
2658 } else if (Name.consume_front("min.") &&
2659 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2660 Name == "ui" || Name == "ull")) {
2661 Value *Arg0 = CI->getArgOperand(0);
2662 Value *Arg1 = CI->getArgOperand(1);
2663 Value *Cmp = Name.starts_with("u")
2664 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
2665 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
2666 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
2667 } else if (Name == "clz.ll") {
2668 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2669 Value *Arg = CI->getArgOperand(0);
2670 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->getType()},
2671 {Arg, Builder.getFalse()},
2672 /*FMFSource=*/nullptr, "ctlz");
2673 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2674 } else if (Name == "popc.ll") {
2675 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2676 // i64.
2677 Value *Arg = CI->getArgOperand(0);
2678 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->getType()},
2679 Arg, /*FMFSource=*/nullptr, "ctpop");
2680 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2681 } else if (Name == "h2f") {
2682 Rep = Builder.CreateIntrinsic(Intrinsic::convert_from_fp16,
2683 {Builder.getFloatTy()}, CI->getArgOperand(0),
2684 /*FMFSource=*/nullptr, "h2f");
2685 } else if (Name.consume_front("bitcast.") &&
2686 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2687 Name == "d2ll")) {
2688 Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
2689 } else if (Name == "rotate.b32") {
2690 Value *Arg = CI->getOperand(0);
2691 Value *ShiftAmt = CI->getOperand(1);
2692 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2693 {Arg, Arg, ShiftAmt});
2694 } else if (Name == "rotate.b64") {
2695 Type *Int64Ty = Builder.getInt64Ty();
2696 Value *Arg = CI->getOperand(0);
2697 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2698 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2699 {Arg, Arg, ZExtShiftAmt});
2700 } else if (Name == "rotate.right.b64") {
2701 Type *Int64Ty = Builder.getInt64Ty();
2702 Value *Arg = CI->getOperand(0);
2703 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2704 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2705 {Arg, Arg, ZExtShiftAmt});
2706 } else if (Name == "swap.lo.hi.b64") {
2707 Type *Int64Ty = Builder.getInt64Ty();
2708 Value *Arg = CI->getOperand(0);
2709 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2710 {Arg, Arg, Builder.getInt64(32)});
2711 } else if ((Name.consume_front("ptr.gen.to.") &&
2712 consumeNVVMPtrAddrSpace(Name)) ||
2713 (Name.consume_front("ptr.") && consumeNVVMPtrAddrSpace(Name) &&
2714 Name.starts_with(".to.gen"))) {
2715 Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType());
2716 } else if (Name.consume_front("ldg.global")) {
2717 Value *Ptr = CI->getArgOperand(0);
2718 Align PtrAlign = cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue();
2719 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2720 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2721 Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign);
2722 MDNode *MD = MDNode::get(Builder.getContext(), {});
2723 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2724 return LD;
2725 } else if (Name == "tanh.approx.f32") {
2726 // nvvm.tanh.approx.f32 -> afn llvm.tanh.f32
2727 FastMathFlags FMF;
2728 FMF.setApproxFunc();
2729 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->getArgOperand(0),
2730 FMF);
2731 } else if (Name == "barrier0" || Name == "barrier.n" || Name == "bar.sync") {
2732 Value *Arg =
2733 Name.ends_with('0') ? Builder.getInt32(0) : CI->getArgOperand(0);
2734 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2735 {}, {Arg});
2736 } else if (Name == "barrier") {
2737 Rep = Builder.CreateIntrinsic(
2738 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2739 {CI->getArgOperand(0), CI->getArgOperand(1)});
2740 } else if (Name == "barrier.sync") {
2741 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2742 {CI->getArgOperand(0)});
2743 } else if (Name == "barrier.sync.cnt") {
2744 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2745 {CI->getArgOperand(0), CI->getArgOperand(1)});
2746 } else {
2748 if (IID != Intrinsic::not_intrinsic &&
2749 !F->getReturnType()->getScalarType()->isBFloatTy()) {
2750 rename(F);
2751 Function *NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
2753 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2754 Value *Arg = CI->getArgOperand(I);
2755 Type *OldType = Arg->getType();
2756 Type *NewType = NewFn->getArg(I)->getType();
2757 Args.push_back(
2758 (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2759 ? Builder.CreateBitCast(Arg, NewType)
2760 : Arg);
2761 }
2762 Rep = Builder.CreateCall(NewFn, Args);
2763 if (F->getReturnType()->isIntegerTy())
2764 Rep = Builder.CreateBitCast(Rep, F->getReturnType());
2765 }
2766 }
2767
2768 return Rep;
2769}
2770
2772 IRBuilder<> &Builder) {
2773 LLVMContext &C = F->getContext();
2774 Value *Rep = nullptr;
2775
2776 if (Name.starts_with("sse4a.movnt.")) {
2778 Elts.push_back(
2779 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2780 MDNode *Node = MDNode::get(C, Elts);
2781
2782 Value *Arg0 = CI->getArgOperand(0);
2783 Value *Arg1 = CI->getArgOperand(1);
2784
2785 // Nontemporal (unaligned) store of the 0'th element of the float/double
2786 // vector.
2787 Value *Extract =
2788 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2789
2790 StoreInst *SI = Builder.CreateAlignedStore(Extract, Arg0, Align(1));
2791 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2792 } else if (Name.starts_with("avx.movnt.") ||
2793 Name.starts_with("avx512.storent.")) {
2795 Elts.push_back(
2796 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2797 MDNode *Node = MDNode::get(C, Elts);
2798
2799 Value *Arg0 = CI->getArgOperand(0);
2800 Value *Arg1 = CI->getArgOperand(1);
2801
2802 StoreInst *SI = Builder.CreateAlignedStore(
2803 Arg1, Arg0,
2805 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2806 } else if (Name == "sse2.storel.dq") {
2807 Value *Arg0 = CI->getArgOperand(0);
2808 Value *Arg1 = CI->getArgOperand(1);
2809
2810 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2811 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2812 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2813 Builder.CreateAlignedStore(Elt, Arg0, Align(1));
2814 } else if (Name.starts_with("sse.storeu.") ||
2815 Name.starts_with("sse2.storeu.") ||
2816 Name.starts_with("avx.storeu.")) {
2817 Value *Arg0 = CI->getArgOperand(0);
2818 Value *Arg1 = CI->getArgOperand(1);
2819 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2820 } else if (Name == "avx512.mask.store.ss") {
2821 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2822 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2823 Mask, false);
2824 } else if (Name.starts_with("avx512.mask.store")) {
2825 // "avx512.mask.storeu." or "avx512.mask.store."
2826 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2827 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2828 CI->getArgOperand(2), Aligned);
2829 } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
2830 // Upgrade packed integer vector compare intrinsics to compare instructions.
2831 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2832 bool CmpEq = Name[9] == 'e';
2833 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2834 CI->getArgOperand(0), CI->getArgOperand(1));
2835 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2836 } else if (Name.starts_with("avx512.broadcastm")) {
2837 Type *ExtTy = Type::getInt32Ty(C);
2838 if (CI->getOperand(0)->getType()->isIntegerTy(8))
2839 ExtTy = Type::getInt64Ty(C);
2840 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2841 ExtTy->getPrimitiveSizeInBits();
2842 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2843 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2844 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2845 Value *Vec = CI->getArgOperand(0);
2846 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2847 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->getType(), Elt0);
2848 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2849 } else if (Name.starts_with("avx.sqrt.p") ||
2850 Name.starts_with("sse2.sqrt.p") ||
2851 Name.starts_with("sse.sqrt.p")) {
2852 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2853 {CI->getArgOperand(0)});
2854 } else if (Name.starts_with("avx512.mask.sqrt.p")) {
2855 if (CI->arg_size() == 4 &&
2856 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2857 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2858 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2859 : Intrinsic::x86_avx512_sqrt_pd_512;
2860
2861 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
2862 Rep = Builder.CreateIntrinsic(IID, Args);
2863 } else {
2864 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2865 {CI->getArgOperand(0)});
2866 }
2867 Rep =
2868 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2869 } else if (Name.starts_with("avx512.ptestm") ||
2870 Name.starts_with("avx512.ptestnm")) {
2871 Value *Op0 = CI->getArgOperand(0);
2872 Value *Op1 = CI->getArgOperand(1);
2873 Value *Mask = CI->getArgOperand(2);
2874 Rep = Builder.CreateAnd(Op0, Op1);
2875 llvm::Type *Ty = Op0->getType();
2877 ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
2880 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2881 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2882 } else if (Name.starts_with("avx512.mask.pbroadcast")) {
2883 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2884 ->getNumElements();
2885 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2886 Rep =
2887 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2888 } else if (Name.starts_with("avx512.kunpck")) {
2889 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2890 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2891 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2892 int Indices[64];
2893 for (unsigned i = 0; i != NumElts; ++i)
2894 Indices[i] = i;
2895
2896 // First extract half of each vector. This gives better codegen than
2897 // doing it in a single shuffle.
2898 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2899 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2900 // Concat the vectors.
2901 // NOTE: Operands have to be swapped to match intrinsic definition.
2902 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2903 Rep = Builder.CreateBitCast(Rep, CI->getType());
2904 } else if (Name == "avx512.kand.w") {
2905 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2906 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2907 Rep = Builder.CreateAnd(LHS, RHS);
2908 Rep = Builder.CreateBitCast(Rep, CI->getType());
2909 } else if (Name == "avx512.kandn.w") {
2910 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2911 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2912 LHS = Builder.CreateNot(LHS);
2913 Rep = Builder.CreateAnd(LHS, RHS);
2914 Rep = Builder.CreateBitCast(Rep, CI->getType());
2915 } else if (Name == "avx512.kor.w") {
2916 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2917 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2918 Rep = Builder.CreateOr(LHS, RHS);
2919 Rep = Builder.CreateBitCast(Rep, CI->getType());
2920 } else if (Name == "avx512.kxor.w") {
2921 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2922 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2923 Rep = Builder.CreateXor(LHS, RHS);
2924 Rep = Builder.CreateBitCast(Rep, CI->getType());
2925 } else if (Name == "avx512.kxnor.w") {
2926 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2927 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2928 LHS = Builder.CreateNot(LHS);
2929 Rep = Builder.CreateXor(LHS, RHS);
2930 Rep = Builder.CreateBitCast(Rep, CI->getType());
2931 } else if (Name == "avx512.knot.w") {
2932 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2933 Rep = Builder.CreateNot(Rep);
2934 Rep = Builder.CreateBitCast(Rep, CI->getType());
2935 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
2936 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2937 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2938 Rep = Builder.CreateOr(LHS, RHS);
2939 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2940 Value *C;
2941 if (Name[14] == 'c')
2942 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2943 else
2944 C = ConstantInt::getNullValue(Builder.getInt16Ty());
2945 Rep = Builder.CreateICmpEQ(Rep, C);
2946 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2947 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2948 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2949 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2950 Name == "sse.div.ss" || Name == "sse2.div.sd") {
2951 Type *I32Ty = Type::getInt32Ty(C);
2952 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2953 ConstantInt::get(I32Ty, 0));
2954 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2955 ConstantInt::get(I32Ty, 0));
2956 Value *EltOp;
2957 if (Name.contains(".add."))
2958 EltOp = Builder.CreateFAdd(Elt0, Elt1);
2959 else if (Name.contains(".sub."))
2960 EltOp = Builder.CreateFSub(Elt0, Elt1);
2961 else if (Name.contains(".mul."))
2962 EltOp = Builder.CreateFMul(Elt0, Elt1);
2963 else
2964 EltOp = Builder.CreateFDiv(Elt0, Elt1);
2965 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2966 ConstantInt::get(I32Ty, 0));
2967 } else if (Name.starts_with("avx512.mask.pcmp")) {
2968 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2969 bool CmpEq = Name[16] == 'e';
2970 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2971 } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
2972 Type *OpTy = CI->getArgOperand(0)->getType();
2973 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2974 Intrinsic::ID IID;
2975 switch (VecWidth) {
2976 default:
2977 llvm_unreachable("Unexpected intrinsic");
2978 case 128:
2979 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
2980 break;
2981 case 256:
2982 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
2983 break;
2984 case 512:
2985 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
2986 break;
2987 }
2988
2989 Rep =
2990 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
2991 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2992 } else if (Name.starts_with("avx512.mask.fpclass.p")) {
2993 Type *OpTy = CI->getArgOperand(0)->getType();
2994 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2995 unsigned EltWidth = OpTy->getScalarSizeInBits();
2996 Intrinsic::ID IID;
2997 if (VecWidth == 128 && EltWidth == 32)
2998 IID = Intrinsic::x86_avx512_fpclass_ps_128;
2999 else if (VecWidth == 256 && EltWidth == 32)
3000 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3001 else if (VecWidth == 512 && EltWidth == 32)
3002 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3003 else if (VecWidth == 128 && EltWidth == 64)
3004 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3005 else if (VecWidth == 256 && EltWidth == 64)
3006 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3007 else if (VecWidth == 512 && EltWidth == 64)
3008 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3009 else
3010 llvm_unreachable("Unexpected intrinsic");
3011
3012 Rep =
3013 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
3014 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
3015 } else if (Name.starts_with("avx512.cmp.p")) {
3016 SmallVector<Value *, 4> Args(CI->args());
3017 Type *OpTy = Args[0]->getType();
3018 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3019 unsigned EltWidth = OpTy->getScalarSizeInBits();
3020 Intrinsic::ID IID;
3021 if (VecWidth == 128 && EltWidth == 32)
3022 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3023 else if (VecWidth == 256 && EltWidth == 32)
3024 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3025 else if (VecWidth == 512 && EltWidth == 32)
3026 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3027 else if (VecWidth == 128 && EltWidth == 64)
3028 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3029 else if (VecWidth == 256 && EltWidth == 64)
3030 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3031 else if (VecWidth == 512 && EltWidth == 64)
3032 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3033 else
3034 llvm_unreachable("Unexpected intrinsic");
3035
3037 if (VecWidth == 512)
3038 std::swap(Mask, Args.back());
3039 Args.push_back(Mask);
3040
3041 Rep = Builder.CreateIntrinsic(IID, Args);
3042 } else if (Name.starts_with("avx512.mask.cmp.")) {
3043 // Integer compare intrinsics.
3044 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3045 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
3046 } else if (Name.starts_with("avx512.mask.ucmp.")) {
3047 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3048 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
3049 } else if (Name.starts_with("avx512.cvtb2mask.") ||
3050 Name.starts_with("avx512.cvtw2mask.") ||
3051 Name.starts_with("avx512.cvtd2mask.") ||
3052 Name.starts_with("avx512.cvtq2mask.")) {
3053 Value *Op = CI->getArgOperand(0);
3054 Value *Zero = llvm::Constant::getNullValue(Op->getType());
3055 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
3056 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
3057 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
3058 Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
3059 Name.starts_with("avx512.mask.pabs")) {
3060 Rep = upgradeAbs(Builder, *CI);
3061 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
3062 Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
3063 Name.starts_with("avx512.mask.pmaxs")) {
3064 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
3065 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
3066 Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
3067 Name.starts_with("avx512.mask.pmaxu")) {
3068 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
3069 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
3070 Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
3071 Name.starts_with("avx512.mask.pmins")) {
3072 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
3073 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
3074 Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
3075 Name.starts_with("avx512.mask.pminu")) {
3076 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
3077 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
3078 Name == "avx512.pmulu.dq.512" ||
3079 Name.starts_with("avx512.mask.pmulu.dq.")) {
3080 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
3081 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
3082 Name == "avx512.pmul.dq.512" ||
3083 Name.starts_with("avx512.mask.pmul.dq.")) {
3084 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
3085 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
3086 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
3087 Rep =
3088 Builder.CreateSIToFP(CI->getArgOperand(1),
3089 cast<VectorType>(CI->getType())->getElementType());
3090 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3091 } else if (Name == "avx512.cvtusi2sd") {
3092 Rep =
3093 Builder.CreateUIToFP(CI->getArgOperand(1),
3094 cast<VectorType>(CI->getType())->getElementType());
3095 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3096 } else if (Name == "sse2.cvtss2sd") {
3097 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
3098 Rep = Builder.CreateFPExt(
3099 Rep, cast<VectorType>(CI->getType())->getElementType());
3100 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3101 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
3102 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
3103 Name.starts_with("avx512.mask.cvtdq2pd.") ||
3104 Name.starts_with("avx512.mask.cvtudq2pd.") ||
3105 Name.starts_with("avx512.mask.cvtdq2ps.") ||
3106 Name.starts_with("avx512.mask.cvtudq2ps.") ||
3107 Name.starts_with("avx512.mask.cvtqq2pd.") ||
3108 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
3109 Name == "avx512.mask.cvtqq2ps.256" ||
3110 Name == "avx512.mask.cvtqq2ps.512" ||
3111 Name == "avx512.mask.cvtuqq2ps.256" ||
3112 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
3113 Name == "avx.cvt.ps2.pd.256" ||
3114 Name == "avx512.mask.cvtps2pd.128" ||
3115 Name == "avx512.mask.cvtps2pd.256") {
3116 auto *DstTy = cast<FixedVectorType>(CI->getType());
3117 Rep = CI->getArgOperand(0);
3118 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3119
3120 unsigned NumDstElts = DstTy->getNumElements();
3121 if (NumDstElts < SrcTy->getNumElements()) {
3122 assert(NumDstElts == 2 && "Unexpected vector size");
3123 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
3124 }
3125
3126 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3127 bool IsUnsigned = Name.contains("cvtu");
3128 if (IsPS2PD)
3129 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
3130 else if (CI->arg_size() == 4 &&
3131 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
3132 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
3133 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3134 : Intrinsic::x86_avx512_sitofp_round;
3135 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3136 {Rep, CI->getArgOperand(3)});
3137 } else {
3138 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
3139 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
3140 }
3141
3142 if (CI->arg_size() >= 3)
3143 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3144 CI->getArgOperand(1));
3145 } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
3146 Name.starts_with("vcvtph2ps.")) {
3147 auto *DstTy = cast<FixedVectorType>(CI->getType());
3148 Rep = CI->getArgOperand(0);
3149 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3150 unsigned NumDstElts = DstTy->getNumElements();
3151 if (NumDstElts != SrcTy->getNumElements()) {
3152 assert(NumDstElts == 4 && "Unexpected vector size");
3153 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
3154 }
3155 Rep = Builder.CreateBitCast(
3156 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
3157 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
3158 if (CI->arg_size() >= 3)
3159 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3160 CI->getArgOperand(1));
3161 } else if (Name.starts_with("avx512.mask.load")) {
3162 // "avx512.mask.loadu." or "avx512.mask.load."
3163 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
3164 Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3165 CI->getArgOperand(2), Aligned);
3166 } else if (Name.starts_with("avx512.mask.expand.load.")) {
3167 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3168 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3169 ResultTy->getNumElements());
3170
3171 Rep = Builder.CreateIntrinsic(
3172 Intrinsic::masked_expandload, ResultTy,
3173 {CI->getOperand(0), MaskVec, CI->getOperand(1)});
3174 } else if (Name.starts_with("avx512.mask.compress.store.")) {
3175 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
3176 Value *MaskVec =
3177 getX86MaskVec(Builder, CI->getArgOperand(2),
3178 cast<FixedVectorType>(ResultTy)->getNumElements());
3179
3180 Rep = Builder.CreateIntrinsic(
3181 Intrinsic::masked_compressstore, ResultTy,
3182 {CI->getArgOperand(1), CI->getArgOperand(0), MaskVec});
3183 } else if (Name.starts_with("avx512.mask.compress.") ||
3184 Name.starts_with("avx512.mask.expand.")) {
3185 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3186
3187 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3188 ResultTy->getNumElements());
3189
3190 bool IsCompress = Name[12] == 'c';
3191 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3192 : Intrinsic::x86_avx512_mask_expand;
3193 Rep = Builder.CreateIntrinsic(
3194 IID, ResultTy, {CI->getOperand(0), CI->getOperand(1), MaskVec});
3195 } else if (Name.starts_with("xop.vpcom")) {
3196 bool IsSigned;
3197 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
3198 Name.ends_with("uq"))
3199 IsSigned = false;
3200 else if (Name.ends_with("b") || Name.ends_with("w") ||
3201 Name.ends_with("d") || Name.ends_with("q"))
3202 IsSigned = true;
3203 else
3204 llvm_unreachable("Unknown suffix");
3205
3206 unsigned Imm;
3207 if (CI->arg_size() == 3) {
3208 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3209 } else {
3210 Name = Name.substr(9); // strip off "xop.vpcom"
3211 if (Name.starts_with("lt"))
3212 Imm = 0;
3213 else if (Name.starts_with("le"))
3214 Imm = 1;
3215 else if (Name.starts_with("gt"))
3216 Imm = 2;
3217 else if (Name.starts_with("ge"))
3218 Imm = 3;
3219 else if (Name.starts_with("eq"))
3220 Imm = 4;
3221 else if (Name.starts_with("ne"))
3222 Imm = 5;
3223 else if (Name.starts_with("false"))
3224 Imm = 6;
3225 else if (Name.starts_with("true"))
3226 Imm = 7;
3227 else
3228 llvm_unreachable("Unknown condition");
3229 }
3230
3231 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
3232 } else if (Name.starts_with("xop.vpcmov")) {
3233 Value *Sel = CI->getArgOperand(2);
3234 Value *NotSel = Builder.CreateNot(Sel);
3235 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
3236 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
3237 Rep = Builder.CreateOr(Sel0, Sel1);
3238 } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
3239 Name.starts_with("avx512.mask.prol")) {
3240 Rep = upgradeX86Rotate(Builder, *CI, false);
3241 } else if (Name.starts_with("avx512.pror") ||
3242 Name.starts_with("avx512.mask.pror")) {
3243 Rep = upgradeX86Rotate(Builder, *CI, true);
3244 } else if (Name.starts_with("avx512.vpshld.") ||
3245 Name.starts_with("avx512.mask.vpshld") ||
3246 Name.starts_with("avx512.maskz.vpshld")) {
3247 bool ZeroMask = Name[11] == 'z';
3248 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
3249 } else if (Name.starts_with("avx512.vpshrd.") ||
3250 Name.starts_with("avx512.mask.vpshrd") ||
3251 Name.starts_with("avx512.maskz.vpshrd")) {
3252 bool ZeroMask = Name[11] == 'z';
3253 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
3254 } else if (Name == "sse42.crc32.64.8") {
3255 Value *Trunc0 =
3256 Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
3257 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3258 {Trunc0, CI->getArgOperand(1)});
3259 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
3260 } else if (Name.starts_with("avx.vbroadcast.s") ||
3261 Name.starts_with("avx512.vbroadcast.s")) {
3262 // Replace broadcasts with a series of insertelements.
3263 auto *VecTy = cast<FixedVectorType>(CI->getType());
3264 Type *EltTy = VecTy->getElementType();
3265 unsigned EltNum = VecTy->getNumElements();
3266 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
3267 Type *I32Ty = Type::getInt32Ty(C);
3268 Rep = PoisonValue::get(VecTy);
3269 for (unsigned I = 0; I < EltNum; ++I)
3270 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
3271 } else if (Name.starts_with("sse41.pmovsx") ||
3272 Name.starts_with("sse41.pmovzx") ||
3273 Name.starts_with("avx2.pmovsx") ||
3274 Name.starts_with("avx2.pmovzx") ||
3275 Name.starts_with("avx512.mask.pmovsx") ||
3276 Name.starts_with("avx512.mask.pmovzx")) {
3277 auto *DstTy = cast<FixedVectorType>(CI->getType());
3278 unsigned NumDstElts = DstTy->getNumElements();
3279
3280 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
3281 SmallVector<int, 8> ShuffleMask(NumDstElts);
3282 for (unsigned i = 0; i != NumDstElts; ++i)
3283 ShuffleMask[i] = i;
3284
3285 Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
3286
3287 bool DoSext = Name.contains("pmovsx");
3288 Rep =
3289 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3290 // If there are 3 arguments, it's a masked intrinsic so we need a select.
3291 if (CI->arg_size() == 3)
3292 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3293 CI->getArgOperand(1));
3294 } else if (Name == "avx512.mask.pmov.qd.256" ||
3295 Name == "avx512.mask.pmov.qd.512" ||
3296 Name == "avx512.mask.pmov.wb.256" ||
3297 Name == "avx512.mask.pmov.wb.512") {
3298 Type *Ty = CI->getArgOperand(1)->getType();
3299 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
3300 Rep =
3301 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3302 } else if (Name.starts_with("avx.vbroadcastf128") ||
3303 Name == "avx2.vbroadcasti128") {
3304 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
3305 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
3306 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
3307 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
3308 Value *Load = Builder.CreateAlignedLoad(VT, CI->getArgOperand(0), Align(1));
3309 if (NumSrcElts == 2)
3310 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
3311 else
3312 Rep = Builder.CreateShuffleVector(Load,
3313 ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
3314 } else if (Name.starts_with("avx512.mask.shuf.i") ||
3315 Name.starts_with("avx512.mask.shuf.f")) {
3316 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3317 Type *VT = CI->getType();
3318 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
3319 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
3320 unsigned ControlBitsMask = NumLanes - 1;
3321 unsigned NumControlBits = NumLanes / 2;
3322 SmallVector<int, 8> ShuffleMask(0);
3323
3324 for (unsigned l = 0; l != NumLanes; ++l) {
3325 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3326 // We actually need the other source.
3327 if (l >= NumLanes / 2)
3328 LaneMask += NumLanes;
3329 for (unsigned i = 0; i != NumElementsInLane; ++i)
3330 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3331 }
3332 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3333 CI->getArgOperand(1), ShuffleMask);
3334 Rep =
3335 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3336 } else if (Name.starts_with("avx512.mask.broadcastf") ||
3337 Name.starts_with("avx512.mask.broadcasti")) {
3338 unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
3339 ->getNumElements();
3340 unsigned NumDstElts =
3341 cast<FixedVectorType>(CI->getType())->getNumElements();
3342
3343 SmallVector<int, 8> ShuffleMask(NumDstElts);
3344 for (unsigned i = 0; i != NumDstElts; ++i)
3345 ShuffleMask[i] = i % NumSrcElts;
3346
3347 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3348 CI->getArgOperand(0), ShuffleMask);
3349 Rep =
3350 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3351 } else if (Name.starts_with("avx2.pbroadcast") ||
3352 Name.starts_with("avx2.vbroadcast") ||
3353 Name.starts_with("avx512.pbroadcast") ||
3354 Name.starts_with("avx512.mask.broadcast.s")) {
3355 // Replace vp?broadcasts with a vector shuffle.
3356 Value *Op = CI->getArgOperand(0);
3357 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
3358 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
3361 Rep = Builder.CreateShuffleVector(Op, M);
3362
3363 if (CI->arg_size() == 3)
3364 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3365 CI->getArgOperand(1));
3366 } else if (Name.starts_with("sse2.padds.") ||
3367 Name.starts_with("avx2.padds.") ||
3368 Name.starts_with("avx512.padds.") ||
3369 Name.starts_with("avx512.mask.padds.")) {
3370 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3371 } else if (Name.starts_with("sse2.psubs.") ||
3372 Name.starts_with("avx2.psubs.") ||
3373 Name.starts_with("avx512.psubs.") ||
3374 Name.starts_with("avx512.mask.psubs.")) {
3375 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3376 } else if (Name.starts_with("sse2.paddus.") ||
3377 Name.starts_with("avx2.paddus.") ||
3378 Name.starts_with("avx512.mask.paddus.")) {
3379 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3380 } else if (Name.starts_with("sse2.psubus.") ||
3381 Name.starts_with("avx2.psubus.") ||
3382 Name.starts_with("avx512.mask.psubus.")) {
3383 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3384 } else if (Name.starts_with("avx512.mask.palignr.")) {
3385 Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3386 CI->getArgOperand(1), CI->getArgOperand(2),
3387 CI->getArgOperand(3), CI->getArgOperand(4),
3388 false);
3389 } else if (Name.starts_with("avx512.mask.valign.")) {
3391 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3392 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
3393 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3394 // 128/256-bit shift left specified in bits.
3395 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3396 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3397 Shift / 8); // Shift is in bits.
3398 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3399 // 128/256-bit shift right specified in bits.
3400 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3401 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3402 Shift / 8); // Shift is in bits.
3403 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3404 Name == "avx512.psll.dq.512") {
3405 // 128/256/512-bit shift left specified in bytes.
3406 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3407 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3408 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3409 Name == "avx512.psrl.dq.512") {
3410 // 128/256/512-bit shift right specified in bytes.
3411 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3412 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3413 } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
3414 Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
3415 Name.starts_with("avx2.pblendd.")) {
3416 Value *Op0 = CI->getArgOperand(0);
3417 Value *Op1 = CI->getArgOperand(1);
3418 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3419 auto *VecTy = cast<FixedVectorType>(CI->getType());
3420 unsigned NumElts = VecTy->getNumElements();
3421
3422 SmallVector<int, 16> Idxs(NumElts);
3423 for (unsigned i = 0; i != NumElts; ++i)
3424 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3425
3426 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3427 } else if (Name.starts_with("avx.vinsertf128.") ||
3428 Name == "avx2.vinserti128" ||
3429 Name.starts_with("avx512.mask.insert")) {
3430 Value *Op0 = CI->getArgOperand(0);
3431 Value *Op1 = CI->getArgOperand(1);
3432 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3433 unsigned DstNumElts =
3434 cast<FixedVectorType>(CI->getType())->getNumElements();
3435 unsigned SrcNumElts =
3436 cast<FixedVectorType>(Op1->getType())->getNumElements();
3437 unsigned Scale = DstNumElts / SrcNumElts;
3438
3439 // Mask off the high bits of the immediate value; hardware ignores those.
3440 Imm = Imm % Scale;
3441
3442 // Extend the second operand into a vector the size of the destination.
3443 SmallVector<int, 8> Idxs(DstNumElts);
3444 for (unsigned i = 0; i != SrcNumElts; ++i)
3445 Idxs[i] = i;
3446 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3447 Idxs[i] = SrcNumElts;
3448 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3449
3450 // Insert the second operand into the first operand.
3451
3452 // Note that there is no guarantee that instruction lowering will actually
3453 // produce a vinsertf128 instruction for the created shuffles. In
3454 // particular, the 0 immediate case involves no lane changes, so it can
3455 // be handled as a blend.
3456
3457 // Example of shuffle mask for 32-bit elements:
3458 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3459 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3460
3461 // First fill with identify mask.
3462 for (unsigned i = 0; i != DstNumElts; ++i)
3463 Idxs[i] = i;
3464 // Then replace the elements where we need to insert.
3465 for (unsigned i = 0; i != SrcNumElts; ++i)
3466 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3467 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3468
3469 // If the intrinsic has a mask operand, handle that.
3470 if (CI->arg_size() == 5)
3471 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3472 CI->getArgOperand(3));
3473 } else if (Name.starts_with("avx.vextractf128.") ||
3474 Name == "avx2.vextracti128" ||
3475 Name.starts_with("avx512.mask.vextract")) {
3476 Value *Op0 = CI->getArgOperand(0);
3477 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3478 unsigned DstNumElts =
3479 cast<FixedVectorType>(CI->getType())->getNumElements();
3480 unsigned SrcNumElts =
3481 cast<FixedVectorType>(Op0->getType())->getNumElements();
3482 unsigned Scale = SrcNumElts / DstNumElts;
3483
3484 // Mask off the high bits of the immediate value; hardware ignores those.
3485 Imm = Imm % Scale;
3486
3487 // Get indexes for the subvector of the input vector.
3488 SmallVector<int, 8> Idxs(DstNumElts);
3489 for (unsigned i = 0; i != DstNumElts; ++i) {
3490 Idxs[i] = i + (Imm * DstNumElts);
3491 }
3492 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3493
3494 // If the intrinsic has a mask operand, handle that.
3495 if (CI->arg_size() == 4)
3496 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3497 CI->getArgOperand(2));
3498 } else if (Name.starts_with("avx512.mask.perm.df.") ||
3499 Name.starts_with("avx512.mask.perm.di.")) {
3500 Value *Op0 = CI->getArgOperand(0);
3501 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3502 auto *VecTy = cast<FixedVectorType>(CI->getType());
3503 unsigned NumElts = VecTy->getNumElements();
3504
3505 SmallVector<int, 8> Idxs(NumElts);
3506 for (unsigned i = 0; i != NumElts; ++i)
3507 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3508
3509 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3510
3511 if (CI->arg_size() == 4)
3512 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3513 CI->getArgOperand(2));
3514 } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3515 // The immediate permute control byte looks like this:
3516 // [1:0] - select 128 bits from sources for low half of destination
3517 // [2] - ignore
3518 // [3] - zero low half of destination
3519 // [5:4] - select 128 bits from sources for high half of destination
3520 // [6] - ignore
3521 // [7] - zero high half of destination
3522
3523 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3524
3525 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3526 unsigned HalfSize = NumElts / 2;
3527 SmallVector<int, 8> ShuffleMask(NumElts);
3528
3529 // Determine which operand(s) are actually in use for this instruction.
3530 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3531 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3532
3533 // If needed, replace operands based on zero mask.
3534 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3535 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3536
3537 // Permute low half of result.
3538 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3539 for (unsigned i = 0; i < HalfSize; ++i)
3540 ShuffleMask[i] = StartIndex + i;
3541
3542 // Permute high half of result.
3543 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3544 for (unsigned i = 0; i < HalfSize; ++i)
3545 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3546
3547 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3548
3549 } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3550 Name.starts_with("avx512.mask.vpermil.p") ||
3551 Name.starts_with("avx512.mask.pshuf.d.")) {
3552 Value *Op0 = CI->getArgOperand(0);
3553 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3554 auto *VecTy = cast<FixedVectorType>(CI->getType());
3555 unsigned NumElts = VecTy->getNumElements();
3556 // Calculate the size of each index in the immediate.
3557 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3558 unsigned IdxMask = ((1 << IdxSize) - 1);
3559
3560 SmallVector<int, 8> Idxs(NumElts);
3561 // Lookup the bits for this element, wrapping around the immediate every
3562 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3563 // to offset by the first index of each group.
3564 for (unsigned i = 0; i != NumElts; ++i)
3565 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3566
3567 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3568
3569 if (CI->arg_size() == 4)
3570 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3571 CI->getArgOperand(2));
3572 } else if (Name == "sse2.pshufl.w" ||
3573 Name.starts_with("avx512.mask.pshufl.w.")) {
3574 Value *Op0 = CI->getArgOperand(0);
3575 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3576 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3577
3578 SmallVector<int, 16> Idxs(NumElts);
3579 for (unsigned l = 0; l != NumElts; l += 8) {
3580 for (unsigned i = 0; i != 4; ++i)
3581 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3582 for (unsigned i = 4; i != 8; ++i)
3583 Idxs[i + l] = i + l;
3584 }
3585
3586 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3587
3588 if (CI->arg_size() == 4)
3589 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3590 CI->getArgOperand(2));
3591 } else if (Name == "sse2.pshufh.w" ||
3592 Name.starts_with("avx512.mask.pshufh.w.")) {
3593 Value *Op0 = CI->getArgOperand(0);
3594 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3595 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3596
3597 SmallVector<int, 16> Idxs(NumElts);
3598 for (unsigned l = 0; l != NumElts; l += 8) {
3599 for (unsigned i = 0; i != 4; ++i)
3600 Idxs[i + l] = i + l;
3601 for (unsigned i = 0; i != 4; ++i)
3602 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3603 }
3604
3605 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3606
3607 if (CI->arg_size() == 4)
3608 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3609 CI->getArgOperand(2));
3610 } else if (Name.starts_with("avx512.mask.shuf.p")) {
3611 Value *Op0 = CI->getArgOperand(0);
3612 Value *Op1 = CI->getArgOperand(1);
3613 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3614 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3615
3616 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3617 unsigned HalfLaneElts = NumLaneElts / 2;
3618
3619 SmallVector<int, 16> Idxs(NumElts);
3620 for (unsigned i = 0; i != NumElts; ++i) {
3621 // Base index is the starting element of the lane.
3622 Idxs[i] = i - (i % NumLaneElts);
3623 // If we are half way through the lane switch to the other source.
3624 if ((i % NumLaneElts) >= HalfLaneElts)
3625 Idxs[i] += NumElts;
3626 // Now select the specific element. By adding HalfLaneElts bits from
3627 // the immediate. Wrapping around the immediate every 8-bits.
3628 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3629 }
3630
3631 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3632
3633 Rep =
3634 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3635 } else if (Name.starts_with("avx512.mask.movddup") ||
3636 Name.starts_with("avx512.mask.movshdup") ||
3637 Name.starts_with("avx512.mask.movsldup")) {
3638 Value *Op0 = CI->getArgOperand(0);
3639 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3640 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3641
3642 unsigned Offset = 0;
3643 if (Name.starts_with("avx512.mask.movshdup."))
3644 Offset = 1;
3645
3646 SmallVector<int, 16> Idxs(NumElts);
3647 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3648 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3649 Idxs[i + l + 0] = i + l + Offset;
3650 Idxs[i + l + 1] = i + l + Offset;
3651 }
3652
3653 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3654
3655 Rep =
3656 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3657 } else if (Name.starts_with("avx512.mask.punpckl") ||
3658 Name.starts_with("avx512.mask.unpckl.")) {
3659 Value *Op0 = CI->getArgOperand(0);
3660 Value *Op1 = CI->getArgOperand(1);
3661 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3662 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3663
3664 SmallVector<int, 64> Idxs(NumElts);
3665 for (int l = 0; l != NumElts; l += NumLaneElts)
3666 for (int i = 0; i != NumLaneElts; ++i)
3667 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3668
3669 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3670
3671 Rep =
3672 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3673 } else if (Name.starts_with("avx512.mask.punpckh") ||
3674 Name.starts_with("avx512.mask.unpckh.")) {
3675 Value *Op0 = CI->getArgOperand(0);
3676 Value *Op1 = CI->getArgOperand(1);
3677 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3678 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3679
3680 SmallVector<int, 64> Idxs(NumElts);
3681 for (int l = 0; l != NumElts; l += NumLaneElts)
3682 for (int i = 0; i != NumLaneElts; ++i)
3683 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3684
3685 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3686
3687 Rep =
3688 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3689 } else if (Name.starts_with("avx512.mask.and.") ||
3690 Name.starts_with("avx512.mask.pand.")) {
3691 VectorType *FTy = cast<VectorType>(CI->getType());
3693 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3694 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3695 Rep = Builder.CreateBitCast(Rep, FTy);
3696 Rep =
3697 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3698 } else if (Name.starts_with("avx512.mask.andn.") ||
3699 Name.starts_with("avx512.mask.pandn.")) {
3700 VectorType *FTy = cast<VectorType>(CI->getType());
3702 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3703 Rep = Builder.CreateAnd(Rep,
3704 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3705 Rep = Builder.CreateBitCast(Rep, FTy);
3706 Rep =
3707 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3708 } else if (Name.starts_with("avx512.mask.or.") ||
3709 Name.starts_with("avx512.mask.por.")) {
3710 VectorType *FTy = cast<VectorType>(CI->getType());
3712 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3713 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3714 Rep = Builder.CreateBitCast(Rep, FTy);
3715 Rep =
3716 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3717 } else if (Name.starts_with("avx512.mask.xor.") ||
3718 Name.starts_with("avx512.mask.pxor.")) {
3719 VectorType *FTy = cast<VectorType>(CI->getType());
3721 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3722 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3723 Rep = Builder.CreateBitCast(Rep, FTy);
3724 Rep =
3725 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3726 } else if (Name.starts_with("avx512.mask.padd.")) {
3727 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3728 Rep =
3729 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3730 } else if (Name.starts_with("avx512.mask.psub.")) {
3731 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3732 Rep =
3733 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3734 } else if (Name.starts_with("avx512.mask.pmull.")) {
3735 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3736 Rep =
3737 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3738 } else if (Name.starts_with("avx512.mask.add.p")) {
3739 if (Name.ends_with(".512")) {
3740 Intrinsic::ID IID;
3741 if (Name[17] == 's')
3742 IID = Intrinsic::x86_avx512_add_ps_512;
3743 else
3744 IID = Intrinsic::x86_avx512_add_pd_512;
3745
3746 Rep = Builder.CreateIntrinsic(
3747 IID,
3748 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3749 } else {
3750 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3751 }
3752 Rep =
3753 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3754 } else if (Name.starts_with("avx512.mask.div.p")) {
3755 if (Name.ends_with(".512")) {
3756 Intrinsic::ID IID;
3757 if (Name[17] == 's')
3758 IID = Intrinsic::x86_avx512_div_ps_512;
3759 else
3760 IID = Intrinsic::x86_avx512_div_pd_512;
3761
3762 Rep = Builder.CreateIntrinsic(
3763 IID,
3764 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3765 } else {
3766 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3767 }
3768 Rep =
3769 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3770 } else if (Name.starts_with("avx512.mask.mul.p")) {
3771 if (Name.ends_with(".512")) {
3772 Intrinsic::ID IID;
3773 if (Name[17] == 's')
3774 IID = Intrinsic::x86_avx512_mul_ps_512;
3775 else
3776 IID = Intrinsic::x86_avx512_mul_pd_512;
3777
3778 Rep = Builder.CreateIntrinsic(
3779 IID,
3780 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3781 } else {
3782 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3783 }
3784 Rep =
3785 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3786 } else if (Name.starts_with("avx512.mask.sub.p")) {
3787 if (Name.ends_with(".512")) {
3788 Intrinsic::ID IID;
3789 if (Name[17] == 's')
3790 IID = Intrinsic::x86_avx512_sub_ps_512;
3791 else
3792 IID = Intrinsic::x86_avx512_sub_pd_512;
3793
3794 Rep = Builder.CreateIntrinsic(
3795 IID,
3796 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3797 } else {
3798 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3799 }
3800 Rep =
3801 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3802 } else if ((Name.starts_with("avx512.mask.max.p") ||
3803 Name.starts_with("avx512.mask.min.p")) &&
3804 Name.drop_front(18) == ".512") {
3805 bool IsDouble = Name[17] == 'd';
3806 bool IsMin = Name[13] == 'i';
3807 static const Intrinsic::ID MinMaxTbl[2][2] = {
3808 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3809 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3810 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3811
3812 Rep = Builder.CreateIntrinsic(
3813 IID,
3814 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3815 Rep =
3816 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3817 } else if (Name.starts_with("avx512.mask.lzcnt.")) {
3818 Rep =
3819 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->getType(),
3820 {CI->getArgOperand(0), Builder.getInt1(false)});
3821 Rep =
3822 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3823 } else if (Name.starts_with("avx512.mask.psll")) {
3824 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3825 bool IsVariable = Name[16] == 'v';
3826 char Size = Name[16] == '.' ? Name[17]
3827 : Name[17] == '.' ? Name[18]
3828 : Name[18] == '.' ? Name[19]
3829 : Name[20];
3830
3831 Intrinsic::ID IID;
3832 if (IsVariable && Name[17] != '.') {
3833 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3834 IID = Intrinsic::x86_avx2_psllv_q;
3835 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3836 IID = Intrinsic::x86_avx2_psllv_q_256;
3837 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3838 IID = Intrinsic::x86_avx2_psllv_d;
3839 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3840 IID = Intrinsic::x86_avx2_psllv_d_256;
3841 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3842 IID = Intrinsic::x86_avx512_psllv_w_128;
3843 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3844 IID = Intrinsic::x86_avx512_psllv_w_256;
3845 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3846 IID = Intrinsic::x86_avx512_psllv_w_512;
3847 else
3848 llvm_unreachable("Unexpected size");
3849 } else if (Name.ends_with(".128")) {
3850 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3851 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3852 : Intrinsic::x86_sse2_psll_d;
3853 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3854 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3855 : Intrinsic::x86_sse2_psll_q;
3856 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3857 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3858 : Intrinsic::x86_sse2_psll_w;
3859 else
3860 llvm_unreachable("Unexpected size");
3861 } else if (Name.ends_with(".256")) {
3862 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3863 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3864 : Intrinsic::x86_avx2_psll_d;
3865 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3866 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3867 : Intrinsic::x86_avx2_psll_q;
3868 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3869 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3870 : Intrinsic::x86_avx2_psll_w;
3871 else
3872 llvm_unreachable("Unexpected size");
3873 } else {
3874 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3875 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3876 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3877 : Intrinsic::x86_avx512_psll_d_512;
3878 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3879 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3880 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3881 : Intrinsic::x86_avx512_psll_q_512;
3882 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3883 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3884 : Intrinsic::x86_avx512_psll_w_512;
3885 else
3886 llvm_unreachable("Unexpected size");
3887 }
3888
3889 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3890 } else if (Name.starts_with("avx512.mask.psrl")) {
3891 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3892 bool IsVariable = Name[16] == 'v';
3893 char Size = Name[16] == '.' ? Name[17]
3894 : Name[17] == '.' ? Name[18]
3895 : Name[18] == '.' ? Name[19]
3896 : Name[20];
3897
3898 Intrinsic::ID IID;
3899 if (IsVariable && Name[17] != '.') {
3900 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3901 IID = Intrinsic::x86_avx2_psrlv_q;
3902 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3903 IID = Intrinsic::x86_avx2_psrlv_q_256;
3904 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3905 IID = Intrinsic::x86_avx2_psrlv_d;
3906 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3907 IID = Intrinsic::x86_avx2_psrlv_d_256;
3908 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3909 IID = Intrinsic::x86_avx512_psrlv_w_128;
3910 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3911 IID = Intrinsic::x86_avx512_psrlv_w_256;
3912 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3913 IID = Intrinsic::x86_avx512_psrlv_w_512;
3914 else
3915 llvm_unreachable("Unexpected size");
3916 } else if (Name.ends_with(".128")) {
3917 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3918 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3919 : Intrinsic::x86_sse2_psrl_d;
3920 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3921 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3922 : Intrinsic::x86_sse2_psrl_q;
3923 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3924 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3925 : Intrinsic::x86_sse2_psrl_w;
3926 else
3927 llvm_unreachable("Unexpected size");
3928 } else if (Name.ends_with(".256")) {
3929 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3930 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3931 : Intrinsic::x86_avx2_psrl_d;
3932 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3933 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3934 : Intrinsic::x86_avx2_psrl_q;
3935 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3936 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3937 : Intrinsic::x86_avx2_psrl_w;
3938 else
3939 llvm_unreachable("Unexpected size");
3940 } else {
3941 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3942 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
3943 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
3944 : Intrinsic::x86_avx512_psrl_d_512;
3945 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3946 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
3947 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
3948 : Intrinsic::x86_avx512_psrl_q_512;
3949 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3950 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3951 : Intrinsic::x86_avx512_psrl_w_512;
3952 else
3953 llvm_unreachable("Unexpected size");
3954 }
3955
3956 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3957 } else if (Name.starts_with("avx512.mask.psra")) {
3958 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3959 bool IsVariable = Name[16] == 'v';
3960 char Size = Name[16] == '.' ? Name[17]
3961 : Name[17] == '.' ? Name[18]
3962 : Name[18] == '.' ? Name[19]
3963 : Name[20];
3964
3965 Intrinsic::ID IID;
3966 if (IsVariable && Name[17] != '.') {
3967 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3968 IID = Intrinsic::x86_avx2_psrav_d;
3969 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3970 IID = Intrinsic::x86_avx2_psrav_d_256;
3971 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3972 IID = Intrinsic::x86_avx512_psrav_w_128;
3973 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3974 IID = Intrinsic::x86_avx512_psrav_w_256;
3975 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3976 IID = Intrinsic::x86_avx512_psrav_w_512;
3977 else
3978 llvm_unreachable("Unexpected size");
3979 } else if (Name.ends_with(".128")) {
3980 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3981 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3982 : Intrinsic::x86_sse2_psra_d;
3983 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3984 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
3985 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
3986 : Intrinsic::x86_avx512_psra_q_128;
3987 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3988 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3989 : Intrinsic::x86_sse2_psra_w;
3990 else
3991 llvm_unreachable("Unexpected size");
3992 } else if (Name.ends_with(".256")) {
3993 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3994 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3995 : Intrinsic::x86_avx2_psra_d;
3996 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3997 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
3998 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
3999 : Intrinsic::x86_avx512_psra_q_256;
4000 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
4001 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4002 : Intrinsic::x86_avx2_psra_w;
4003 else
4004 llvm_unreachable("Unexpected size");
4005 } else {
4006 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
4007 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4008 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4009 : Intrinsic::x86_avx512_psra_d_512;
4010 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
4011 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4012 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4013 : Intrinsic::x86_avx512_psra_q_512;
4014 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
4015 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4016 : Intrinsic::x86_avx512_psra_w_512;
4017 else
4018 llvm_unreachable("Unexpected size");
4019 }
4020
4021 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
4022 } else if (Name.starts_with("avx512.mask.move.s")) {
4023 Rep = upgradeMaskedMove(Builder, *CI);
4024 } else if (Name.starts_with("avx512.cvtmask2")) {
4025 Rep = upgradeMaskToInt(Builder, *CI);
4026 } else if (Name.ends_with(".movntdqa")) {
4028 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
4029
4030 LoadInst *LI = Builder.CreateAlignedLoad(
4031 CI->getType(), CI->getArgOperand(0),
4033 LI->setMetadata(LLVMContext::MD_nontemporal, Node);
4034 Rep = LI;
4035 } else if (Name.starts_with("fma.vfmadd.") ||
4036 Name.starts_with("fma.vfmsub.") ||
4037 Name.starts_with("fma.vfnmadd.") ||
4038 Name.starts_with("fma.vfnmsub.")) {
4039 bool NegMul = Name[6] == 'n';
4040 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
4041 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
4042
4043 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4044 CI->getArgOperand(2)};
4045
4046 if (IsScalar) {
4047 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4048 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4049 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4050 }
4051
4052 if (NegMul && !IsScalar)
4053 Ops[0] = Builder.CreateFNeg(Ops[0]);
4054 if (NegMul && IsScalar)
4055 Ops[1] = Builder.CreateFNeg(Ops[1]);
4056 if (NegAcc)
4057 Ops[2] = Builder.CreateFNeg(Ops[2]);
4058
4059 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4060
4061 if (IsScalar)
4062 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
4063 } else if (Name.starts_with("fma4.vfmadd.s")) {
4064 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4065 CI->getArgOperand(2)};
4066
4067 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4068 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4069 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4070
4071 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4072
4073 Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
4074 Rep, (uint64_t)0);
4075 } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
4076 Name.starts_with("avx512.maskz.vfmadd.s") ||
4077 Name.starts_with("avx512.mask3.vfmadd.s") ||
4078 Name.starts_with("avx512.mask3.vfmsub.s") ||
4079 Name.starts_with("avx512.mask3.vfnmsub.s")) {
4080 bool IsMask3 = Name[11] == '3';
4081 bool IsMaskZ = Name[11] == 'z';
4082 // Drop the "avx512.mask." to make it easier.
4083 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4084 bool NegMul = Name[2] == 'n';
4085 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4086
4087 Value *A = CI->getArgOperand(0);
4088 Value *B = CI->getArgOperand(1);
4089 Value *C = CI->getArgOperand(2);
4090
4091 if (NegMul && (IsMask3 || IsMaskZ))
4092 A = Builder.CreateFNeg(A);
4093 if (NegMul && !(IsMask3 || IsMaskZ))
4094 B = Builder.CreateFNeg(B);
4095 if (NegAcc)
4096 C = Builder.CreateFNeg(C);
4097
4098 A = Builder.CreateExtractElement(A, (uint64_t)0);
4099 B = Builder.CreateExtractElement(B, (uint64_t)0);
4100 C = Builder.CreateExtractElement(C, (uint64_t)0);
4101
4102 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4103 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
4104 Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
4105
4106 Intrinsic::ID IID;
4107 if (Name.back() == 'd')
4108 IID = Intrinsic::x86_avx512_vfmadd_f64;
4109 else
4110 IID = Intrinsic::x86_avx512_vfmadd_f32;
4111 Rep = Builder.CreateIntrinsic(IID, Ops);
4112 } else {
4113 Rep = Builder.CreateFMA(A, B, C);
4114 }
4115
4116 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType())
4117 : IsMask3 ? C
4118 : A;
4119
4120 // For Mask3 with NegAcc, we need to create a new extractelement that
4121 // avoids the negation above.
4122 if (NegAcc && IsMask3)
4123 PassThru =
4124 Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
4125
4126 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
4127 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
4128 (uint64_t)0);
4129 } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
4130 Name.starts_with("avx512.mask.vfnmadd.p") ||
4131 Name.starts_with("avx512.mask.vfnmsub.p") ||
4132 Name.starts_with("avx512.mask3.vfmadd.p") ||
4133 Name.starts_with("avx512.mask3.vfmsub.p") ||
4134 Name.starts_with("avx512.mask3.vfnmsub.p") ||
4135 Name.starts_with("avx512.maskz.vfmadd.p")) {
4136 bool IsMask3 = Name[11] == '3';
4137 bool IsMaskZ = Name[11] == 'z';
4138 // Drop the "avx512.mask." to make it easier.
4139 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4140 bool NegMul = Name[2] == 'n';
4141 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4142
4143 Value *A = CI->getArgOperand(0);
4144 Value *B = CI->getArgOperand(1);
4145 Value *C = CI->getArgOperand(2);
4146
4147 if (NegMul && (IsMask3 || IsMaskZ))
4148 A = Builder.CreateFNeg(A);
4149 if (NegMul && !(IsMask3 || IsMaskZ))
4150 B = Builder.CreateFNeg(B);
4151 if (NegAcc)
4152 C = Builder.CreateFNeg(C);
4153
4154 if (CI->arg_size() == 5 &&
4155 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4156 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
4157 Intrinsic::ID IID;
4158 // Check the character before ".512" in string.
4159 if (Name[Name.size() - 5] == 's')
4160 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4161 else
4162 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4163
4164 Rep = Builder.CreateIntrinsic(IID, {A, B, C, CI->getArgOperand(4)});
4165 } else {
4166 Rep = Builder.CreateFMA(A, B, C);
4167 }
4168
4169 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4170 : IsMask3 ? CI->getArgOperand(2)
4171 : CI->getArgOperand(0);
4172
4173 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4174 } else if (Name.starts_with("fma.vfmsubadd.p")) {
4175 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4176 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4177 Intrinsic::ID IID;
4178 if (VecWidth == 128 && EltWidth == 32)
4179 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4180 else if (VecWidth == 256 && EltWidth == 32)
4181 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4182 else if (VecWidth == 128 && EltWidth == 64)
4183 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4184 else if (VecWidth == 256 && EltWidth == 64)
4185 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4186 else
4187 llvm_unreachable("Unexpected intrinsic");
4188
4189 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4190 CI->getArgOperand(2)};
4191 Ops[2] = Builder.CreateFNeg(Ops[2]);
4192 Rep = Builder.CreateIntrinsic(IID, Ops);
4193 } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
4194 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
4195 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
4196 Name.starts_with("avx512.mask3.vfmsubadd.p")) {
4197 bool IsMask3 = Name[11] == '3';
4198 bool IsMaskZ = Name[11] == 'z';
4199 // Drop the "avx512.mask." to make it easier.
4200 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4201 bool IsSubAdd = Name[3] == 's';
4202 if (CI->arg_size() == 5) {
4203 Intrinsic::ID IID;
4204 // Check the character before ".512" in string.
4205 if (Name[Name.size() - 5] == 's')
4206 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4207 else
4208 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4209
4210 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4211 CI->getArgOperand(2), CI->getArgOperand(4)};
4212 if (IsSubAdd)
4213 Ops[2] = Builder.CreateFNeg(Ops[2]);
4214
4215 Rep = Builder.CreateIntrinsic(IID, Ops);
4216 } else {
4217 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4218
4219 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4220 CI->getArgOperand(2)};
4221
4223 CI->getModule(), Intrinsic::fma, Ops[0]->getType());
4224 Value *Odd = Builder.CreateCall(FMA, Ops);
4225 Ops[2] = Builder.CreateFNeg(Ops[2]);
4226 Value *Even = Builder.CreateCall(FMA, Ops);
4227
4228 if (IsSubAdd)
4229 std::swap(Even, Odd);
4230
4231 SmallVector<int, 32> Idxs(NumElts);
4232 for (int i = 0; i != NumElts; ++i)
4233 Idxs[i] = i + (i % 2) * NumElts;
4234
4235 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4236 }
4237
4238 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4239 : IsMask3 ? CI->getArgOperand(2)
4240 : CI->getArgOperand(0);
4241
4242 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4243 } else if (Name.starts_with("avx512.mask.pternlog.") ||
4244 Name.starts_with("avx512.maskz.pternlog.")) {
4245 bool ZeroMask = Name[11] == 'z';
4246 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4247 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4248 Intrinsic::ID IID;
4249 if (VecWidth == 128 && EltWidth == 32)
4250 IID = Intrinsic::x86_avx512_pternlog_d_128;
4251 else if (VecWidth == 256 && EltWidth == 32)
4252 IID = Intrinsic::x86_avx512_pternlog_d_256;
4253 else if (VecWidth == 512 && EltWidth == 32)
4254 IID = Intrinsic::x86_avx512_pternlog_d_512;
4255 else if (VecWidth == 128 && EltWidth == 64)
4256 IID = Intrinsic::x86_avx512_pternlog_q_128;
4257 else if (VecWidth == 256 && EltWidth == 64)
4258 IID = Intrinsic::x86_avx512_pternlog_q_256;
4259 else if (VecWidth == 512 && EltWidth == 64)
4260 IID = Intrinsic::x86_avx512_pternlog_q_512;
4261 else
4262 llvm_unreachable("Unexpected intrinsic");
4263
4264 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4265 CI->getArgOperand(2), CI->getArgOperand(3)};
4266 Rep = Builder.CreateIntrinsic(IID, Args);
4267 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4268 : CI->getArgOperand(0);
4269 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
4270 } else if (Name.starts_with("avx512.mask.vpmadd52") ||
4271 Name.starts_with("avx512.maskz.vpmadd52")) {
4272 bool ZeroMask = Name[11] == 'z';
4273 bool High = Name[20] == 'h' || Name[21] == 'h';
4274 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4275 Intrinsic::ID IID;
4276 if (VecWidth == 128 && !High)
4277 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4278 else if (VecWidth == 256 && !High)
4279 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4280 else if (VecWidth == 512 && !High)
4281 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4282 else if (VecWidth == 128 && High)
4283 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4284 else if (VecWidth == 256 && High)
4285 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4286 else if (VecWidth == 512 && High)
4287 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4288 else
4289 llvm_unreachable("Unexpected intrinsic");
4290
4291 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4292 CI->getArgOperand(2)};
4293 Rep = Builder.CreateIntrinsic(IID, Args);
4294 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4295 : CI->getArgOperand(0);
4296 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4297 } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
4298 Name.starts_with("avx512.mask.vpermt2var.") ||
4299 Name.starts_with("avx512.maskz.vpermt2var.")) {
4300 bool ZeroMask = Name[11] == 'z';
4301 bool IndexForm = Name[17] == 'i';
4302 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
4303 } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
4304 Name.starts_with("avx512.maskz.vpdpbusd.") ||
4305 Name.starts_with("avx512.mask.vpdpbusds.") ||
4306 Name.starts_with("avx512.maskz.vpdpbusds.")) {
4307 bool ZeroMask = Name[11] == 'z';
4308 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4309 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4310 Intrinsic::ID IID;
4311 if (VecWidth == 128 && !IsSaturating)
4312 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4313 else if (VecWidth == 256 && !IsSaturating)
4314 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4315 else if (VecWidth == 512 && !IsSaturating)
4316 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4317 else if (VecWidth == 128 && IsSaturating)
4318 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4319 else if (VecWidth == 256 && IsSaturating)
4320 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4321 else if (VecWidth == 512 && IsSaturating)
4322 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4323 else
4324 llvm_unreachable("Unexpected intrinsic");
4325
4326 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4327 CI->getArgOperand(2)};
4328
4329 // Input arguments types were incorrectly set to vectors of i32 before but
4330 // they should be vectors of i8. Insert bit cast when encountering the old
4331 // types
4332 if (Args[1]->getType()->isVectorTy() &&
4333 cast<VectorType>(Args[1]->getType())
4334 ->getElementType()
4335 ->isIntegerTy(32) &&
4336 Args[2]->getType()->isVectorTy() &&
4337 cast<VectorType>(Args[2]->getType())
4338 ->getElementType()
4339 ->isIntegerTy(32)) {
4340 Type *NewArgType = nullptr;
4341 if (VecWidth == 128)
4342 NewArgType = VectorType::get(Builder.getInt8Ty(), 16, false);
4343 else if (VecWidth == 256)
4344 NewArgType = VectorType::get(Builder.getInt8Ty(), 32, false);
4345 else if (VecWidth == 512)
4346 NewArgType = VectorType::get(Builder.getInt8Ty(), 64, false);
4347 else
4348 llvm_unreachable("Unexpected vector bit width");
4349
4350 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4351 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4352 }
4353
4354 Rep = Builder.CreateIntrinsic(IID, Args);
4355 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4356 : CI->getArgOperand(0);
4357 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4358 } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
4359 Name.starts_with("avx512.maskz.vpdpwssd.") ||
4360 Name.starts_with("avx512.mask.vpdpwssds.") ||
4361 Name.starts_with("avx512.maskz.vpdpwssds.")) {
4362 bool ZeroMask = Name[11] == 'z';
4363 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4364 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4365 Intrinsic::ID IID;
4366 if (VecWidth == 128 && !IsSaturating)
4367 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4368 else if (VecWidth == 256 && !IsSaturating)
4369 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4370 else if (VecWidth == 512 && !IsSaturating)
4371 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4372 else if (VecWidth == 128 && IsSaturating)
4373 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4374 else if (VecWidth == 256 && IsSaturating)
4375 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4376 else if (VecWidth == 512 && IsSaturating)
4377 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4378 else
4379 llvm_unreachable("Unexpected intrinsic");
4380
4381 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4382 CI->getArgOperand(2)};
4383
4384 // Input arguments types were incorrectly set to vectors of i32 before but
4385 // they should be vectors of i16. Insert bit cast when encountering the old
4386 // types
4387 if (Args[1]->getType()->isVectorTy() &&
4388 cast<VectorType>(Args[1]->getType())
4389 ->getElementType()
4390 ->isIntegerTy(32) &&
4391 Args[2]->getType()->isVectorTy() &&
4392 cast<VectorType>(Args[2]->getType())
4393 ->getElementType()
4394 ->isIntegerTy(32)) {
4395 Type *NewArgType = nullptr;
4396 if (VecWidth == 128)
4397 NewArgType = VectorType::get(Builder.getInt16Ty(), 8, false);
4398 else if (VecWidth == 256)
4399 NewArgType = VectorType::get(Builder.getInt16Ty(), 16, false);
4400 else if (VecWidth == 512)
4401 NewArgType = VectorType::get(Builder.getInt16Ty(), 32, false);
4402 else
4403 llvm_unreachable("Unexpected vector bit width");
4404
4405 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4406 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4407 }
4408
4409 Rep = Builder.CreateIntrinsic(IID, Args);
4410 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4411 : CI->getArgOperand(0);
4412 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4413 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4414 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4415 Name == "subborrow.u32" || Name == "subborrow.u64") {
4416 Intrinsic::ID IID;
4417 if (Name[0] == 'a' && Name.back() == '2')
4418 IID = Intrinsic::x86_addcarry_32;
4419 else if (Name[0] == 'a' && Name.back() == '4')
4420 IID = Intrinsic::x86_addcarry_64;
4421 else if (Name[0] == 's' && Name.back() == '2')
4422 IID = Intrinsic::x86_subborrow_32;
4423 else if (Name[0] == 's' && Name.back() == '4')
4424 IID = Intrinsic::x86_subborrow_64;
4425 else
4426 llvm_unreachable("Unexpected intrinsic");
4427
4428 // Make a call with 3 operands.
4429 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4430 CI->getArgOperand(2)};
4431 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4432
4433 // Extract the second result and store it.
4434 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4435 Builder.CreateAlignedStore(Data, CI->getArgOperand(3), Align(1));
4436 // Replace the original call result with the first result of the new call.
4437 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4438
4439 CI->replaceAllUsesWith(CF);
4440 Rep = nullptr;
4441 } else if (Name.starts_with("avx512.mask.") &&
4442 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4443 // Rep will be updated by the call in the condition.
4444 }
4445
4446 return Rep;
4447}
4448
4450 Function *F, IRBuilder<> &Builder) {
4451 if (Name.starts_with("neon.bfcvt")) {
4452 if (Name.starts_with("neon.bfcvtn2")) {
4453 SmallVector<int, 32> LoMask(4);
4454 std::iota(LoMask.begin(), LoMask.end(), 0);
4455 SmallVector<int, 32> ConcatMask(8);
4456 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4457 Value *Inactive = Builder.CreateShuffleVector(CI->getOperand(0), LoMask);
4458 Value *Trunc =
4459 Builder.CreateFPTrunc(CI->getOperand(1), Inactive->getType());
4460 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4461 } else if (Name.starts_with("neon.bfcvtn")) {
4462 SmallVector<int, 32> ConcatMask(8);
4463 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4464 Type *V4BF16 =
4465 FixedVectorType::get(Type::getBFloatTy(F->getContext()), 4);
4466 Value *Trunc = Builder.CreateFPTrunc(CI->getOperand(0), V4BF16);
4467 dbgs() << "Trunc: " << *Trunc << "\n";
4468 return Builder.CreateShuffleVector(
4469 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
4470 } else {
4471 return Builder.CreateFPTrunc(CI->getOperand(0),
4472 Type::getBFloatTy(F->getContext()));
4473 }
4474 } else if (Name.starts_with("sve.fcvt")) {
4475 Intrinsic::ID NewID =
4477 .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4478 .Case("sve.fcvtnt.bf16f32",
4479 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4481 if (NewID == Intrinsic::not_intrinsic)
4482 llvm_unreachable("Unhandled Intrinsic!");
4483
4484 SmallVector<Value *, 3> Args(CI->args());
4485
4486 // The original intrinsics incorrectly used a predicate based on the
4487 // smallest element type rather than the largest.
4488 Type *BadPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 8);
4489 Type *GoodPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4);
4490
4491 if (Args[1]->getType() != BadPredTy)
4492 llvm_unreachable("Unexpected predicate type!");
4493
4494 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4495 BadPredTy, Args[1]);
4496 Args[1] = Builder.CreateIntrinsic(
4497 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4498
4499 return Builder.CreateIntrinsic(NewID, Args, /*FMFSource=*/nullptr,
4500 CI->getName());
4501 }
4502
4503 llvm_unreachable("Unhandled Intrinsic!");
4504}
4505
4507 IRBuilder<> &Builder) {
4508 if (Name == "mve.vctp64.old") {
4509 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4510 // correct type.
4511 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4512 CI->getArgOperand(0),
4513 /*FMFSource=*/nullptr, CI->getName());
4514 Value *C1 = Builder.CreateIntrinsic(
4515 Intrinsic::arm_mve_pred_v2i,
4516 {VectorType::get(Builder.getInt1Ty(), 2, false)}, VCTP);
4517 return Builder.CreateIntrinsic(
4518 Intrinsic::arm_mve_pred_i2v,
4519 {VectorType::get(Builder.getInt1Ty(), 4, false)}, C1);
4520 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4521 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4522 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4523 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4524 Name ==
4525 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4526 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4527 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4528 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4529 Name ==
4530 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4531 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4532 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4533 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4534 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4535 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4536 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4537 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4538 std::vector<Type *> Tys;
4539 unsigned ID = CI->getIntrinsicID();
4540 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
4541 switch (ID) {
4542 case Intrinsic::arm_mve_mull_int_predicated:
4543 case Intrinsic::arm_mve_vqdmull_predicated:
4544 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4545 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
4546 break;
4547 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4548 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4549 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4550 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
4551 V2I1Ty};
4552 break;
4553 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4554 Tys = {CI->getType(), CI->getOperand(0)->getType(),
4555 CI->getOperand(1)->getType(), V2I1Ty};
4556 break;
4557 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4558 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
4559 CI->getOperand(2)->getType(), V2I1Ty};
4560 break;
4561 case Intrinsic::arm_cde_vcx1q_predicated:
4562 case Intrinsic::arm_cde_vcx1qa_predicated:
4563 case Intrinsic::arm_cde_vcx2q_predicated:
4564 case Intrinsic::arm_cde_vcx2qa_predicated:
4565 case Intrinsic::arm_cde_vcx3q_predicated:
4566 case Intrinsic::arm_cde_vcx3qa_predicated:
4567 Tys = {CI->getOperand(1)->getType(), V2I1Ty};
4568 break;
4569 default:
4570 llvm_unreachable("Unhandled Intrinsic!");
4571 }
4572
4573 std::vector<Value *> Ops;
4574 for (Value *Op : CI->args()) {
4575 Type *Ty = Op->getType();
4576 if (Ty->getScalarSizeInBits() == 1) {
4577 Value *C1 = Builder.CreateIntrinsic(
4578 Intrinsic::arm_mve_pred_v2i,
4579 {VectorType::get(Builder.getInt1Ty(), 4, false)}, Op);
4580 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4581 }
4582 Ops.push_back(Op);
4583 }
4584
4585 return Builder.CreateIntrinsic(ID, Tys, Ops, /*FMFSource=*/nullptr,
4586 CI->getName());
4587 }
4588 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4589}
4590
4591// These are expected to have the arguments:
4592// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4593//
4594// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4595//
4597 Function *F, IRBuilder<> &Builder) {
4598 AtomicRMWInst::BinOp RMWOp =
4600 .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
4601 .StartsWith("ds.fmin", AtomicRMWInst::FMin)
4602 .StartsWith("ds.fmax", AtomicRMWInst::FMax)
4603 .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
4604 .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
4605 .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
4606 .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd)
4607 .StartsWith("global.atomic.fmin", AtomicRMWInst::FMin)
4608 .StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin)
4609 .StartsWith("global.atomic.fmax", AtomicRMWInst::FMax)
4610 .StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax)
4611 .StartsWith("atomic.cond.sub", AtomicRMWInst::USubCond)
4612 .StartsWith("atomic.csub", AtomicRMWInst::USubSat);
4613
4614 unsigned NumOperands = CI->getNumOperands();
4615 if (NumOperands < 3) // Malformed bitcode.
4616 return nullptr;
4617
4618 Value *Ptr = CI->getArgOperand(0);
4619 PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
4620 if (!PtrTy) // Malformed.
4621 return nullptr;
4622
4623 Value *Val = CI->getArgOperand(1);
4624 if (Val->getType() != CI->getType()) // Malformed.
4625 return nullptr;
4626
4627 ConstantInt *OrderArg = nullptr;
4628 bool IsVolatile = false;
4629
4630 // These should have 5 arguments (plus the callee). A separate version of the
4631 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4632 if (NumOperands > 3)
4633 OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
4634
4635 // Ignore scope argument at 3
4636
4637 if (NumOperands > 5) {
4638 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
4639 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4640 }
4641
4643 if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
4644 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4647
4648 LLVMContext &Ctx = F->getContext();
4649
4650 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4651 Type *RetTy = CI->getType();
4652 if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
4653 if (VT->getElementType()->isIntegerTy(16)) {
4654 VectorType *AsBF16 =
4655 VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
4656 Val = Builder.CreateBitCast(Val, AsBF16);
4657 }
4658 }
4659
4660 // The scope argument never really worked correctly. Use agent as the most
4661 // conservative option which should still always produce the instruction.
4662 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4663 AtomicRMWInst *RMW =
4664 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4665
4666 unsigned AddrSpace = PtrTy->getAddressSpace();
4667 if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
4668 MDNode *EmptyMD = MDNode::get(F->getContext(), {});
4669 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
4670 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
4671 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
4672 }
4673
4674 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
4675 MDBuilder MDB(F->getContext());
4676 MDNode *RangeNotPrivate =
4679 RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4680 }
4681
4682 if (IsVolatile)
4683 RMW->setVolatile(true);
4684
4685 return Builder.CreateBitCast(RMW, RetTy);
4686}
4687
4688/// Helper to unwrap intrinsic call MetadataAsValue operands. Return as a
4689/// plain MDNode, as it's the verifier's job to check these are the correct
4690/// types later.
4691static MDNode *unwrapMAVOp(CallBase *CI, unsigned Op) {
4692 if (Op < CI->arg_size()) {
4693 if (MetadataAsValue *MAV =
4695 Metadata *MD = MAV->getMetadata();
4696 return dyn_cast_if_present<MDNode>(MD);
4697 }
4698 }
4699 return nullptr;
4700}
4701
4702/// Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
4703static Metadata *unwrapMAVMetadataOp(CallBase *CI, unsigned Op) {
4704 if (Op < CI->arg_size())
4706 return MAV->getMetadata();
4707 return nullptr;
4708}
4709
4711 // The MDNode attached to this instruction might not be the correct type,
4712 // as the verifier has not yet be run. Fetch it as a bare MDNode.
4713 return I->getDebugLoc().getAsMDNode();
4714}
4715
4716/// Convert debug intrinsic calls to non-instruction debug records.
4717/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4718/// \p CI - The debug intrinsic call.
4720 DbgRecord *DR = nullptr;
4721 if (Name == "label") {
4723 CI->getDebugLoc());
4724 } else if (Name == "assign") {
4727 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), unwrapMAVOp(CI, 3),
4728 unwrapMAVMetadataOp(CI, 4),
4729 /*The address is a Value ref, it will be stored as a Metadata */
4730 unwrapMAVOp(CI, 5), getDebugLocSafe(CI));
4731 } else if (Name == "declare") {
4734 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), nullptr, nullptr, nullptr,
4735 getDebugLocSafe(CI));
4736 } else if (Name == "addr") {
4737 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4738 MDNode *ExprNode = unwrapMAVOp(CI, 2);
4739 // Don't try to add something to the expression if it's not an expression.
4740 // Instead, allow the verifier to fail later.
4741 if (DIExpression *Expr = dyn_cast<DIExpression>(ExprNode)) {
4742 ExprNode = DIExpression::append(Expr, dwarf::DW_OP_deref);
4743 }
4746 unwrapMAVOp(CI, 1), ExprNode, nullptr, nullptr, nullptr,
4747 getDebugLocSafe(CI));
4748 } else if (Name == "value") {
4749 // An old version of dbg.value had an extra offset argument.
4750 unsigned VarOp = 1;
4751 unsigned ExprOp = 2;
4752 if (CI->arg_size() == 4) {
4754 // Nonzero offset dbg.values get dropped without a replacement.
4755 if (!Offset || !Offset->isZeroValue())
4756 return;
4757 VarOp = 2;
4758 ExprOp = 3;
4759 }
4762 unwrapMAVOp(CI, VarOp), unwrapMAVOp(CI, ExprOp), nullptr, nullptr,
4763 nullptr, getDebugLocSafe(CI));
4764 }
4765 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4766 CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());
4767}
4768
4769/// Upgrade a call to an old intrinsic. All argument and return casting must be
4770/// provided to seamlessly integrate with existing context.
4772 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
4773 // checks the callee's function type matches. It's likely we need to handle
4774 // type changes here.
4776 if (!F)
4777 return;
4778
4779 LLVMContext &C = CI->getContext();
4780 IRBuilder<> Builder(C);
4781 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
4782
4783 if (!NewFn) {
4784 // Get the Function's name.
4785 StringRef Name = F->getName();
4786
4787 assert(Name.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'");
4788 Name = Name.substr(5);
4789
4790 bool IsX86 = Name.consume_front("x86.");
4791 bool IsNVVM = Name.consume_front("nvvm.");
4792 bool IsAArch64 = Name.consume_front("aarch64.");
4793 bool IsARM = Name.consume_front("arm.");
4794 bool IsAMDGCN = Name.consume_front("amdgcn.");
4795 bool IsDbg = Name.consume_front("dbg.");
4796 Value *Rep = nullptr;
4797
4798 if (!IsX86 && Name == "stackprotectorcheck") {
4799 Rep = nullptr;
4800 } else if (IsNVVM) {
4801 Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
4802 } else if (IsX86) {
4803 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
4804 } else if (IsAArch64) {
4805 Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
4806 } else if (IsARM) {
4807 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
4808 } else if (IsAMDGCN) {
4809 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
4810 } else if (IsDbg) {
4812 } else {
4813 llvm_unreachable("Unknown function for CallBase upgrade.");
4814 }
4815
4816 if (Rep)
4817 CI->replaceAllUsesWith(Rep);
4818 CI->eraseFromParent();
4819 return;
4820 }
4821
4822 const auto &DefaultCase = [&]() -> void {
4823 if (F == NewFn)
4824 return;
4825
4826 if (CI->getFunctionType() == NewFn->getFunctionType()) {
4827 // Handle generic mangling change.
4828 assert(
4829 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
4830 "Unknown function for CallBase upgrade and isn't just a name change");
4831 CI->setCalledFunction(NewFn);
4832 return;
4833 }
4834
4835 // This must be an upgrade from a named to a literal struct.
4836 if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
4837 assert(OldST != NewFn->getReturnType() &&
4838 "Return type must have changed");
4839 assert(OldST->getNumElements() ==
4840 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
4841 "Must have same number of elements");
4842
4843 SmallVector<Value *> Args(CI->args());
4844 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
4845 NewCI->setAttributes(CI->getAttributes());
4846 Value *Res = PoisonValue::get(OldST);
4847 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4848 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4849 Res = Builder.CreateInsertValue(Res, Elem, Idx);
4850 }
4851 CI->replaceAllUsesWith(Res);
4852 CI->eraseFromParent();
4853 return;
4854 }
4855
4856 // We're probably about to produce something invalid. Let the verifier catch
4857 // it instead of dying here.
4858 CI->setCalledOperand(
4860 return;
4861 };
4862 CallInst *NewCall = nullptr;
4863 switch (NewFn->getIntrinsicID()) {
4864 default: {
4865 DefaultCase();
4866 return;
4867 }
4868 case Intrinsic::arm_neon_vst1:
4869 case Intrinsic::arm_neon_vst2:
4870 case Intrinsic::arm_neon_vst3:
4871 case Intrinsic::arm_neon_vst4:
4872 case Intrinsic::arm_neon_vst2lane:
4873 case Intrinsic::arm_neon_vst3lane:
4874 case Intrinsic::arm_neon_vst4lane: {
4875 SmallVector<Value *, 4> Args(CI->args());
4876 NewCall = Builder.CreateCall(NewFn, Args);
4877 break;
4878 }
4879 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4880 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4881 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4882 LLVMContext &Ctx = F->getParent()->getContext();
4883 SmallVector<Value *, 4> Args(CI->args());
4884 Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
4885 cast<ConstantInt>(Args[3])->getZExtValue());
4886 NewCall = Builder.CreateCall(NewFn, Args);
4887 break;
4888 }
4889 case Intrinsic::aarch64_sve_ld3_sret:
4890 case Intrinsic::aarch64_sve_ld4_sret:
4891 case Intrinsic::aarch64_sve_ld2_sret: {
4892 StringRef Name = F->getName();
4893 Name = Name.substr(5);
4894 unsigned N = StringSwitch<unsigned>(Name)
4895 .StartsWith("aarch64.sve.ld2", 2)
4896 .StartsWith("aarch64.sve.ld3", 3)
4897 .StartsWith("aarch64.sve.ld4", 4)
4898 .Default(0);
4899 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4900 unsigned MinElts = RetTy->getMinNumElements() / N;
4901 SmallVector<Value *, 2> Args(CI->args());
4902 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
4903 Value *Ret = llvm::PoisonValue::get(RetTy);
4904 for (unsigned I = 0; I < N; I++) {
4905 Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
4906 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, I * MinElts);
4907 }
4908 NewCall = dyn_cast<CallInst>(Ret);
4909 break;
4910 }
4911
4912 case Intrinsic::coro_end: {
4913 SmallVector<Value *, 3> Args(CI->args());
4914 Args.push_back(ConstantTokenNone::get(CI->getContext()));
4915 NewCall = Builder.CreateCall(NewFn, Args);
4916 break;
4917 }
4918
4919 case Intrinsic::vector_extract: {
4920 StringRef Name = F->getName();
4921 Name = Name.substr(5); // Strip llvm
4922 if (!Name.starts_with("aarch64.sve.tuple.get")) {
4923 DefaultCase();
4924 return;
4925 }
4926 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4927 unsigned MinElts = RetTy->getMinNumElements();
4928 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4929 Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4930 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
4931 break;
4932 }
4933
4934 case Intrinsic::vector_insert: {
4935 StringRef Name = F->getName();
4936 Name = Name.substr(5);
4937 if (!Name.starts_with("aarch64.sve.tuple")) {
4938 DefaultCase();
4939 return;
4940 }
4941 if (Name.starts_with("aarch64.sve.tuple.set")) {
4942 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4943 auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
4944 Value *NewIdx =
4945 ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
4946 NewCall = Builder.CreateCall(
4947 NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
4948 break;
4949 }
4950 if (Name.starts_with("aarch64.sve.tuple.create")) {
4951 unsigned N = StringSwitch<unsigned>(Name)
4952 .StartsWith("aarch64.sve.tuple.create2", 2)
4953 .StartsWith("aarch64.sve.tuple.create3", 3)
4954 .StartsWith("aarch64.sve.tuple.create4", 4)
4955 .Default(0);
4956 assert(N > 1 && "Create is expected to be between 2-4");
4957 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4958 Value *Ret = llvm::PoisonValue::get(RetTy);
4959 unsigned MinElts = RetTy->getMinNumElements() / N;
4960 for (unsigned I = 0; I < N; I++) {
4961 Value *V = CI->getArgOperand(I);
4962 Ret = Builder.CreateInsertVector(RetTy, Ret, V, I * MinElts);
4963 }
4964 NewCall = dyn_cast<CallInst>(Ret);
4965 }
4966 break;
4967 }
4968
4969 case Intrinsic::arm_neon_bfdot:
4970 case Intrinsic::arm_neon_bfmmla:
4971 case Intrinsic::arm_neon_bfmlalb:
4972 case Intrinsic::arm_neon_bfmlalt:
4973 case Intrinsic::aarch64_neon_bfdot:
4974 case Intrinsic::aarch64_neon_bfmmla:
4975 case Intrinsic::aarch64_neon_bfmlalb:
4976 case Intrinsic::aarch64_neon_bfmlalt: {
4978 assert(CI->arg_size() == 3 &&
4979 "Mismatch between function args and call args");
4980 size_t OperandWidth =
4982 assert((OperandWidth == 64 || OperandWidth == 128) &&
4983 "Unexpected operand width");
4984 Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
4985 auto Iter = CI->args().begin();
4986 Args.push_back(*Iter++);
4987 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4988 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4989 NewCall = Builder.CreateCall(NewFn, Args);
4990 break;
4991 }
4992
4993 case Intrinsic::bitreverse:
4994 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4995 break;
4996
4997 case Intrinsic::ctlz:
4998 case Intrinsic::cttz:
4999 assert(CI->arg_size() == 1 &&
5000 "Mismatch between function args and call args");
5001 NewCall =
5002 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
5003 break;
5004
5005 case Intrinsic::objectsize: {
5006 Value *NullIsUnknownSize =
5007 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
5008 Value *Dynamic =
5009 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
5010 NewCall = Builder.CreateCall(
5011 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
5012 break;
5013 }
5014
5015 case Intrinsic::ctpop:
5016 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5017 break;
5018
5019 case Intrinsic::convert_from_fp16:
5020 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5021 break;
5022
5023 case Intrinsic::dbg_value: {
5024 StringRef Name = F->getName();
5025 Name = Name.substr(5); // Strip llvm.
5026 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
5027 if (Name.starts_with("dbg.addr")) {
5029 cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
5030 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
5031 NewCall =
5032 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
5033 MetadataAsValue::get(C, Expr)});
5034 break;
5035 }
5036
5037 // Upgrade from the old version that had an extra offset argument.
5038 assert(CI->arg_size() == 4);
5039 // Drop nonzero offsets instead of attempting to upgrade them.
5041 if (Offset->isZeroValue()) {
5042 NewCall = Builder.CreateCall(
5043 NewFn,
5044 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
5045 break;
5046 }
5047 CI->eraseFromParent();
5048 return;
5049 }
5050
5051 case Intrinsic::ptr_annotation:
5052 // Upgrade from versions that lacked the annotation attribute argument.
5053 if (CI->arg_size() != 4) {
5054 DefaultCase();
5055 return;
5056 }
5057
5058 // Create a new call with an added null annotation attribute argument.
5059 NewCall = Builder.CreateCall(
5060 NewFn,
5061 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5062 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5063 NewCall->takeName(CI);
5064 CI->replaceAllUsesWith(NewCall);
5065 CI->eraseFromParent();
5066 return;
5067
5068 case Intrinsic::var_annotation:
5069 // Upgrade from versions that lacked the annotation attribute argument.
5070 if (CI->arg_size() != 4) {
5071 DefaultCase();
5072 return;
5073 }
5074 // Create a new call with an added null annotation attribute argument.
5075 NewCall = Builder.CreateCall(
5076 NewFn,
5077 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5078 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5079 NewCall->takeName(CI);
5080 CI->replaceAllUsesWith(NewCall);
5081 CI->eraseFromParent();
5082 return;
5083
5084 case Intrinsic::riscv_aes32dsi:
5085 case Intrinsic::riscv_aes32dsmi:
5086 case Intrinsic::riscv_aes32esi:
5087 case Intrinsic::riscv_aes32esmi:
5088 case Intrinsic::riscv_sm4ks:
5089 case Intrinsic::riscv_sm4ed: {
5090 // The last argument to these intrinsics used to be i8 and changed to i32.
5091 // The type overload for sm4ks and sm4ed was removed.
5092 Value *Arg2 = CI->getArgOperand(2);
5093 if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
5094 return;
5095
5096 Value *Arg0 = CI->getArgOperand(0);
5097 Value *Arg1 = CI->getArgOperand(1);
5098 if (CI->getType()->isIntegerTy(64)) {
5099 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5100 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5101 }
5102
5103 Arg2 = ConstantInt::get(Type::getInt32Ty(C),
5104 cast<ConstantInt>(Arg2)->getZExtValue());
5105
5106 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5107 Value *Res = NewCall;
5108 if (Res->getType() != CI->getType())
5109 Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5110 NewCall->takeName(CI);
5111 CI->replaceAllUsesWith(Res);
5112 CI->eraseFromParent();
5113 return;
5114 }
5115 case Intrinsic::nvvm_mapa_shared_cluster: {
5116 // Create a new call with the correct address space.
5117 NewCall =
5118 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)});
5119 Value *Res = NewCall;
5120 Res = Builder.CreateAddrSpaceCast(
5121 Res, Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED));
5122 NewCall->takeName(CI);
5123 CI->replaceAllUsesWith(Res);
5124 CI->eraseFromParent();
5125 return;
5126 }
5127 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5128 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5129 // Create a new call with the correct address space.
5130 SmallVector<Value *, 4> Args(CI->args());
5131 Args[0] = Builder.CreateAddrSpaceCast(
5132 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5133
5134 NewCall = Builder.CreateCall(NewFn, Args);
5135 NewCall->takeName(CI);
5136 CI->replaceAllUsesWith(NewCall);
5137 CI->eraseFromParent();
5138 return;
5139 }
5140 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5141 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5142 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5143 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5144 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5145 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5146 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5147 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5148 SmallVector<Value *, 16> Args(CI->args());
5149
5150 // Create AddrSpaceCast to shared_cluster if needed.
5151 // This handles case (1) in shouldUpgradeNVPTXTMAG2SIntrinsics().
5152 unsigned AS = CI->getArgOperand(0)->getType()->getPointerAddressSpace();
5154 Args[0] = Builder.CreateAddrSpaceCast(
5155 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5156
5157 // Attach the flag argument for cta_group, with a
5158 // default value of 0. This handles case (2) in
5159 // shouldUpgradeNVPTXTMAG2SIntrinsics().
5160 size_t NumArgs = CI->arg_size();
5161 Value *FlagArg = CI->getArgOperand(NumArgs - 3);
5162 if (!FlagArg->getType()->isIntegerTy(1))
5163 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5164
5165 NewCall = Builder.CreateCall(NewFn, Args);
5166 NewCall->takeName(CI);
5167 CI->replaceAllUsesWith(NewCall);
5168 CI->eraseFromParent();
5169 return;
5170 }
5171 case Intrinsic::riscv_sha256sig0:
5172 case Intrinsic::riscv_sha256sig1:
5173 case Intrinsic::riscv_sha256sum0:
5174 case Intrinsic::riscv_sha256sum1:
5175 case Intrinsic::riscv_sm3p0:
5176 case Intrinsic::riscv_sm3p1: {
5177 // The last argument to these intrinsics used to be i8 and changed to i32.
5178 // The type overload for sm4ks and sm4ed was removed.
5179 if (!CI->getType()->isIntegerTy(64))
5180 return;
5181
5182 Value *Arg =
5183 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
5184
5185 NewCall = Builder.CreateCall(NewFn, Arg);
5186 Value *Res =
5187 Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5188 NewCall->takeName(CI);
5189 CI->replaceAllUsesWith(Res);
5190 CI->eraseFromParent();
5191 return;
5192 }
5193
5194 case Intrinsic::x86_xop_vfrcz_ss:
5195 case Intrinsic::x86_xop_vfrcz_sd:
5196 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
5197 break;
5198
5199 case Intrinsic::x86_xop_vpermil2pd:
5200 case Intrinsic::x86_xop_vpermil2ps:
5201 case Intrinsic::x86_xop_vpermil2pd_256:
5202 case Intrinsic::x86_xop_vpermil2ps_256: {
5203 SmallVector<Value *, 4> Args(CI->args());
5204 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
5205 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
5206 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5207 NewCall = Builder.CreateCall(NewFn, Args);
5208 break;
5209 }
5210
5211 case Intrinsic::x86_sse41_ptestc:
5212 case Intrinsic::x86_sse41_ptestz:
5213 case Intrinsic::x86_sse41_ptestnzc: {
5214 // The arguments for these intrinsics used to be v4f32, and changed
5215 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
5216 // So, the only thing required is a bitcast for both arguments.
5217 // First, check the arguments have the old type.
5218 Value *Arg0 = CI->getArgOperand(0);
5219 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
5220 return;
5221
5222 // Old intrinsic, add bitcasts
5223 Value *Arg1 = CI->getArgOperand(1);
5224
5225 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
5226
5227 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
5228 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
5229
5230 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5231 break;
5232 }
5233
5234 case Intrinsic::x86_rdtscp: {
5235 // This used to take 1 arguments. If we have no arguments, it is already
5236 // upgraded.
5237 if (CI->getNumOperands() == 0)
5238 return;
5239
5240 NewCall = Builder.CreateCall(NewFn);
5241 // Extract the second result and store it.
5242 Value *Data = Builder.CreateExtractValue(NewCall, 1);
5243 Builder.CreateAlignedStore(Data, CI->getArgOperand(0), Align(1));
5244 // Replace the original call result with the first result of the new call.
5245 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5246
5247 NewCall->takeName(CI);
5248 CI->replaceAllUsesWith(TSC);
5249 CI->eraseFromParent();
5250 return;
5251 }
5252
5253 case Intrinsic::x86_sse41_insertps:
5254 case Intrinsic::x86_sse41_dppd:
5255 case Intrinsic::x86_sse41_dpps:
5256 case Intrinsic::x86_sse41_mpsadbw:
5257 case Intrinsic::x86_avx_dp_ps_256:
5258 case Intrinsic::x86_avx2_mpsadbw: {
5259 // Need to truncate the last argument from i32 to i8 -- this argument models
5260 // an inherently 8-bit immediate operand to these x86 instructions.
5261 SmallVector<Value *, 4> Args(CI->args());
5262
5263 // Replace the last argument with a trunc.
5264 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
5265 NewCall = Builder.CreateCall(NewFn, Args);
5266 break;
5267 }
5268
5269 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5270 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5271 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5272 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5273 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5274 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5275 SmallVector<Value *, 4> Args(CI->args());
5276 unsigned NumElts =
5277 cast<FixedVectorType>(Args[0]->getType())->getNumElements();
5278 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
5279
5280 NewCall = Builder.CreateCall(NewFn, Args);
5281 Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
5282
5283 NewCall->takeName(CI);
5284 CI->replaceAllUsesWith(Res);
5285 CI->eraseFromParent();
5286 return;
5287 }
5288
5289 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5290 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5291 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5292 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5293 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5294 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5295 SmallVector<Value *, 4> Args(CI->args());
5296 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
5297 if (NewFn->getIntrinsicID() ==
5298 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5299 Args[1] = Builder.CreateBitCast(
5300 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5301
5302 NewCall = Builder.CreateCall(NewFn, Args);
5303 Value *Res = Builder.CreateBitCast(
5304 NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
5305
5306 NewCall->takeName(CI);
5307 CI->replaceAllUsesWith(Res);
5308 CI->eraseFromParent();
5309 return;
5310 }
5311 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5312 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5313 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5314 SmallVector<Value *, 4> Args(CI->args());
5315 unsigned NumElts =
5316 cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
5317 Args[1] = Builder.CreateBitCast(
5318 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5319 Args[2] = Builder.CreateBitCast(
5320 Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5321
5322 NewCall = Builder.CreateCall(NewFn, Args);
5323 break;
5324 }
5325
5326 case Intrinsic::thread_pointer: {
5327 NewCall = Builder.CreateCall(NewFn, {});
5328 break;
5329 }
5330
5331 case Intrinsic::memcpy:
5332 case Intrinsic::memmove:
5333 case Intrinsic::memset: {
5334 // We have to make sure that the call signature is what we're expecting.
5335 // We only want to change the old signatures by removing the alignment arg:
5336 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
5337 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
5338 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
5339 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
5340 // Note: i8*'s in the above can be any pointer type
5341 if (CI->arg_size() != 5) {
5342 DefaultCase();
5343 return;
5344 }
5345 // Remove alignment argument (3), and add alignment attributes to the
5346 // dest/src pointers.
5347 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
5348 CI->getArgOperand(2), CI->getArgOperand(4)};
5349 NewCall = Builder.CreateCall(NewFn, Args);
5350 AttributeList OldAttrs = CI->getAttributes();
5351 AttributeList NewAttrs = AttributeList::get(
5352 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5353 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5354 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5355 NewCall->setAttributes(NewAttrs);
5356 auto *MemCI = cast<MemIntrinsic>(NewCall);
5357 // All mem intrinsics support dest alignment.
5359 MemCI->setDestAlignment(Align->getMaybeAlignValue());
5360 // Memcpy/Memmove also support source alignment.
5361 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
5362 MTI->setSourceAlignment(Align->getMaybeAlignValue());
5363 break;
5364 }
5365
5366 case Intrinsic::masked_load:
5367 case Intrinsic::masked_gather:
5368 case Intrinsic::masked_store:
5369 case Intrinsic::masked_scatter: {
5370 if (CI->arg_size() != 4) {
5371 DefaultCase();
5372 return;
5373 }
5374
5375 auto GetMaybeAlign = [](Value *Op) {
5376 if (auto *CI = dyn_cast<ConstantInt>(Op)) {
5377 uint64_t Val = CI->getZExtValue();
5378 if (Val == 0)
5379 return MaybeAlign();
5380 if (isPowerOf2_64(Val))
5381 return MaybeAlign(Val);
5382 }
5383 reportFatalUsageError("Invalid alignment argument");
5384 };
5385 auto GetAlign = [&](Value *Op) {
5386 MaybeAlign Align = GetMaybeAlign(Op);
5387 if (Align)
5388 return *Align;
5389 reportFatalUsageError("Invalid zero alignment argument");
5390 };
5391
5392 const DataLayout &DL = CI->getDataLayout();
5393 switch (NewFn->getIntrinsicID()) {
5394 case Intrinsic::masked_load:
5395 NewCall = Builder.CreateMaskedLoad(
5396 CI->getType(), CI->getArgOperand(0), GetAlign(CI->getArgOperand(1)),
5397 CI->getArgOperand(2), CI->getArgOperand(3));
5398 break;
5399 case Intrinsic::masked_gather:
5400 NewCall = Builder.CreateMaskedGather(
5401 CI->getType(), CI->getArgOperand(0),
5402 DL.getValueOrABITypeAlignment(GetMaybeAlign(CI->getArgOperand(1)),
5403 CI->getType()->getScalarType()),
5404 CI->getArgOperand(2), CI->getArgOperand(3));
5405 break;
5406 case Intrinsic::masked_store:
5407 NewCall = Builder.CreateMaskedStore(
5408 CI->getArgOperand(0), CI->getArgOperand(1),
5409 GetAlign(CI->getArgOperand(2)), CI->getArgOperand(3));
5410 break;
5411 case Intrinsic::masked_scatter:
5412 NewCall = Builder.CreateMaskedScatter(
5413 CI->getArgOperand(0), CI->getArgOperand(1),
5414 DL.getValueOrABITypeAlignment(
5415 GetMaybeAlign(CI->getArgOperand(2)),
5416 CI->getArgOperand(0)->getType()->getScalarType()),
5417 CI->getArgOperand(3));
5418 break;
5419 default:
5420 llvm_unreachable("Unexpected intrinsic ID");
5421 }
5422 // Previous metadata is still valid.
5423 NewCall->copyMetadata(*CI);
5424 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5425 break;
5426 }
5427
5428 case Intrinsic::lifetime_start:
5429 case Intrinsic::lifetime_end: {
5430 if (CI->arg_size() != 2) {
5431 DefaultCase();
5432 return;
5433 }
5434
5435 Value *Ptr = CI->getArgOperand(1);
5436 // Try to strip pointer casts, such that the lifetime works on an alloca.
5437 Ptr = Ptr->stripPointerCasts();
5438 if (isa<AllocaInst>(Ptr)) {
5439 // Don't use NewFn, as we might have looked through an addrspacecast.
5440 if (NewFn->getIntrinsicID() == Intrinsic::lifetime_start)
5441 NewCall = Builder.CreateLifetimeStart(Ptr);
5442 else
5443 NewCall = Builder.CreateLifetimeEnd(Ptr);
5444 break;
5445 }
5446
5447 // Otherwise remove the lifetime marker.
5448 CI->eraseFromParent();
5449 return;
5450 }
5451
5452 case Intrinsic::x86_avx512_vpdpbusd_128:
5453 case Intrinsic::x86_avx512_vpdpbusd_256:
5454 case Intrinsic::x86_avx512_vpdpbusd_512:
5455 case Intrinsic::x86_avx512_vpdpbusds_128:
5456 case Intrinsic::x86_avx512_vpdpbusds_256:
5457 case Intrinsic::x86_avx512_vpdpbusds_512:
5458 case Intrinsic::x86_avx2_vpdpbssd_128:
5459 case Intrinsic::x86_avx2_vpdpbssd_256:
5460 case Intrinsic::x86_avx10_vpdpbssd_512:
5461 case Intrinsic::x86_avx2_vpdpbssds_128:
5462 case Intrinsic::x86_avx2_vpdpbssds_256:
5463 case Intrinsic::x86_avx10_vpdpbssds_512:
5464 case Intrinsic::x86_avx2_vpdpbsud_128:
5465 case Intrinsic::x86_avx2_vpdpbsud_256:
5466 case Intrinsic::x86_avx10_vpdpbsud_512:
5467 case Intrinsic::x86_avx2_vpdpbsuds_128:
5468 case Intrinsic::x86_avx2_vpdpbsuds_256:
5469 case Intrinsic::x86_avx10_vpdpbsuds_512:
5470 case Intrinsic::x86_avx2_vpdpbuud_128:
5471 case Intrinsic::x86_avx2_vpdpbuud_256:
5472 case Intrinsic::x86_avx10_vpdpbuud_512:
5473 case Intrinsic::x86_avx2_vpdpbuuds_128:
5474 case Intrinsic::x86_avx2_vpdpbuuds_256:
5475 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5476 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 8;
5477 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5478 CI->getArgOperand(2)};
5479 Type *NewArgType = VectorType::get(Builder.getInt8Ty(), NumElts, false);
5480 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5481 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5482
5483 NewCall = Builder.CreateCall(NewFn, Args);
5484 break;
5485 }
5486 case Intrinsic::x86_avx512_vpdpwssd_128:
5487 case Intrinsic::x86_avx512_vpdpwssd_256:
5488 case Intrinsic::x86_avx512_vpdpwssd_512:
5489 case Intrinsic::x86_avx512_vpdpwssds_128:
5490 case Intrinsic::x86_avx512_vpdpwssds_256:
5491 case Intrinsic::x86_avx512_vpdpwssds_512:
5492 case Intrinsic::x86_avx2_vpdpwsud_128:
5493 case Intrinsic::x86_avx2_vpdpwsud_256:
5494 case Intrinsic::x86_avx10_vpdpwsud_512:
5495 case Intrinsic::x86_avx2_vpdpwsuds_128:
5496 case Intrinsic::x86_avx2_vpdpwsuds_256:
5497 case Intrinsic::x86_avx10_vpdpwsuds_512:
5498 case Intrinsic::x86_avx2_vpdpwusd_128:
5499 case Intrinsic::x86_avx2_vpdpwusd_256:
5500 case Intrinsic::x86_avx10_vpdpwusd_512:
5501 case Intrinsic::x86_avx2_vpdpwusds_128:
5502 case Intrinsic::x86_avx2_vpdpwusds_256:
5503 case Intrinsic::x86_avx10_vpdpwusds_512:
5504 case Intrinsic::x86_avx2_vpdpwuud_128:
5505 case Intrinsic::x86_avx2_vpdpwuud_256:
5506 case Intrinsic::x86_avx10_vpdpwuud_512:
5507 case Intrinsic::x86_avx2_vpdpwuuds_128:
5508 case Intrinsic::x86_avx2_vpdpwuuds_256:
5509 case Intrinsic::x86_avx10_vpdpwuuds_512:
5510 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 16;
5511 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5512 CI->getArgOperand(2)};
5513 Type *NewArgType = VectorType::get(Builder.getInt16Ty(), NumElts, false);
5514 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5515 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5516
5517 NewCall = Builder.CreateCall(NewFn, Args);
5518 break;
5519 }
5520 assert(NewCall && "Should have either set this variable or returned through "
5521 "the default case");
5522 NewCall->takeName(CI);
5523 CI->replaceAllUsesWith(NewCall);
5524 CI->eraseFromParent();
5525}
5526
5528 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
5529
5530 // Check if this function should be upgraded and get the replacement function
5531 // if there is one.
5532 Function *NewFn;
5533 if (UpgradeIntrinsicFunction(F, NewFn)) {
5534 // Replace all users of the old function with the new function or new
5535 // instructions. This is not a range loop because the call is deleted.
5536 for (User *U : make_early_inc_range(F->users()))
5537 if (CallBase *CB = dyn_cast<CallBase>(U))
5538 UpgradeIntrinsicCall(CB, NewFn);
5539
5540 // Remove old function, no longer used, from the module.
5541 if (F != NewFn)
5542 F->eraseFromParent();
5543 }
5544}
5545
5547 const unsigned NumOperands = MD.getNumOperands();
5548 if (NumOperands == 0)
5549 return &MD; // Invalid, punt to a verifier error.
5550
5551 // Check if the tag uses struct-path aware TBAA format.
5552 if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
5553 return &MD;
5554
5555 auto &Context = MD.getContext();
5556 if (NumOperands == 3) {
5557 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
5558 MDNode *ScalarType = MDNode::get(Context, Elts);
5559 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
5560 Metadata *Elts2[] = {ScalarType, ScalarType,
5563 MD.getOperand(2)};
5564 return MDNode::get(Context, Elts2);
5565 }
5566 // Create a MDNode <MD, MD, offset 0>
5568 Type::getInt64Ty(Context)))};
5569 return MDNode::get(Context, Elts);
5570}
5571
5573 Instruction *&Temp) {
5574 if (Opc != Instruction::BitCast)
5575 return nullptr;
5576
5577 Temp = nullptr;
5578 Type *SrcTy = V->getType();
5579 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5580 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5581 LLVMContext &Context = V->getContext();
5582
5583 // We have no information about target data layout, so we assume that
5584 // the maximum pointer size is 64bit.
5585 Type *MidTy = Type::getInt64Ty(Context);
5586 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
5587
5588 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
5589 }
5590
5591 return nullptr;
5592}
5593
5595 if (Opc != Instruction::BitCast)
5596 return nullptr;
5597
5598 Type *SrcTy = C->getType();
5599 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5600 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5601 LLVMContext &Context = C->getContext();
5602
5603 // We have no information about target data layout, so we assume that
5604 // the maximum pointer size is 64bit.
5605 Type *MidTy = Type::getInt64Ty(Context);
5606
5608 DestTy);
5609 }
5610
5611 return nullptr;
5612}
5613
5614/// Check the debug info version number, if it is out-dated, drop the debug
5615/// info. Return true if module is modified.
5618 return false;
5619
5620 llvm::TimeTraceScope timeScope("Upgrade debug info");
5621 // We need to get metadata before the module is verified (i.e., getModuleFlag
5622 // makes assumptions that we haven't verified yet). Carefully extract the flag
5623 // from the metadata.
5624 unsigned Version = 0;
5625 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5626 auto OpIt = find_if(ModFlags->operands(), [](const MDNode *Flag) {
5627 if (Flag->getNumOperands() < 3)
5628 return false;
5629 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5630 return K->getString() == "Debug Info Version";
5631 return false;
5632 });
5633 if (OpIt != ModFlags->op_end()) {
5634 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5635 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(ValOp))
5636 Version = CI->getZExtValue();
5637 }
5638 }
5639
5641 bool BrokenDebugInfo = false;
5642 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
5643 report_fatal_error("Broken module found, compilation aborted!");
5644 if (!BrokenDebugInfo)
5645 // Everything is ok.
5646 return false;
5647 else {
5648 // Diagnose malformed debug info.
5650 M.getContext().diagnose(Diag);
5651 }
5652 }
5653 bool Modified = StripDebugInfo(M);
5655 // Diagnose a version mismatch.
5657 M.getContext().diagnose(DiagVersion);
5658 }
5659 return Modified;
5660}
5661
5662static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
5663 GlobalValue *GV, const Metadata *V) {
5664 Function *F = cast<Function>(GV);
5665
5666 constexpr StringLiteral DefaultValue = "1";
5667 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5668 unsigned Length = 0;
5669
5670 if (F->hasFnAttribute(Attr)) {
5671 // We expect the existing attribute to have the form "x[,y[,z]]". Here we
5672 // parse these elements placing them into Vect3
5673 StringRef S = F->getFnAttribute(Attr).getValueAsString();
5674 for (; Length < 3 && !S.empty(); Length++) {
5675 auto [Part, Rest] = S.split(',');
5676 Vect3[Length] = Part.trim();
5677 S = Rest;
5678 }
5679 }
5680
5681 const unsigned Dim = DimC - 'x';
5682 assert(Dim < 3 && "Unexpected dim char");
5683
5684 const uint64_t VInt = mdconst::extract<ConstantInt>(V)->getZExtValue();
5685
5686 // local variable required for StringRef in Vect3 to point to.
5687 const std::string VStr = llvm::utostr(VInt);
5688 Vect3[Dim] = VStr;
5689 Length = std::max(Length, Dim + 1);
5690
5691 const std::string NewAttr = llvm::join(ArrayRef(Vect3, Length), ",");
5692 F->addFnAttr(Attr, NewAttr);
5693}
5694
5695static inline bool isXYZ(StringRef S) {
5696 return S == "x" || S == "y" || S == "z";
5697}
5698
5700 const Metadata *V) {
5701 if (K == "kernel") {
5703 cast<Function>(GV)->setCallingConv(CallingConv::PTX_Kernel);
5704 return true;
5705 }
5706 if (K == "align") {
5707 // V is a bitfeild specifying two 16-bit values. The alignment value is
5708 // specfied in low 16-bits, The index is specified in the high bits. For the
5709 // index, 0 indicates the return value while higher values correspond to
5710 // each parameter (idx = param + 1).
5711 const uint64_t AlignIdxValuePair =
5712 mdconst::extract<ConstantInt>(V)->getZExtValue();
5713 const unsigned Idx = (AlignIdxValuePair >> 16);
5714 const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF);
5715 cast<Function>(GV)->addAttributeAtIndex(
5716 Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign));
5717 return true;
5718 }
5719 if (K == "maxclusterrank" || K == "cluster_max_blocks") {
5720 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5721 cast<Function>(GV)->addFnAttr("nvvm.maxclusterrank", llvm::utostr(CV));
5722 return true;
5723 }
5724 if (K == "minctasm") {
5725 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5726 cast<Function>(GV)->addFnAttr("nvvm.minctasm", llvm::utostr(CV));
5727 return true;
5728 }
5729 if (K == "maxnreg") {
5730 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5731 cast<Function>(GV)->addFnAttr("nvvm.maxnreg", llvm::utostr(CV));
5732 return true;
5733 }
5734 if (K.consume_front("maxntid") && isXYZ(K)) {
5735 upgradeNVVMFnVectorAttr("nvvm.maxntid", K[0], GV, V);
5736 return true;
5737 }
5738 if (K.consume_front("reqntid") && isXYZ(K)) {
5739 upgradeNVVMFnVectorAttr("nvvm.reqntid", K[0], GV, V);
5740 return true;
5741 }
5742 if (K.consume_front("cluster_dim_") && isXYZ(K)) {
5743 upgradeNVVMFnVectorAttr("nvvm.cluster_dim", K[0], GV, V);
5744 return true;
5745 }
5746 if (K == "grid_constant") {
5747 const auto Attr = Attribute::get(GV->getContext(), "nvvm.grid_constant");
5748 for (const auto &Op : cast<MDNode>(V)->operands()) {
5749 // For some reason, the index is 1-based in the metadata. Good thing we're
5750 // able to auto-upgrade it!
5751 const auto Index = mdconst::extract<ConstantInt>(Op)->getZExtValue() - 1;
5752 cast<Function>(GV)->addParamAttr(Index, Attr);
5753 }
5754 return true;
5755 }
5756
5757 return false;
5758}
5759
5761 NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations");
5762 if (!NamedMD)
5763 return;
5764
5765 SmallVector<MDNode *, 8> NewNodes;
5767 for (MDNode *MD : NamedMD->operands()) {
5768 if (!SeenNodes.insert(MD).second)
5769 continue;
5770
5771 auto *GV = mdconst::dyn_extract_or_null<GlobalValue>(MD->getOperand(0));
5772 if (!GV)
5773 continue;
5774
5775 assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
5776
5777 SmallVector<Metadata *, 8> NewOperands{MD->getOperand(0)};
5778 // Each nvvm.annotations metadata entry will be of the following form:
5779 // !{ ptr @gv, !"key1", value1, !"key2", value2, ... }
5780 // start index = 1, to skip the global variable key
5781 // increment = 2, to skip the value for each property-value pairs
5782 for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
5783 MDString *K = cast<MDString>(MD->getOperand(j));
5784 const MDOperand &V = MD->getOperand(j + 1);
5785 bool Upgraded = upgradeSingleNVVMAnnotation(GV, K->getString(), V);
5786 if (!Upgraded)
5787 NewOperands.append({K, V});
5788 }
5789
5790 if (NewOperands.size() > 1)
5791 NewNodes.push_back(MDNode::get(M.getContext(), NewOperands));
5792 }
5793
5794 NamedMD->clearOperands();
5795 for (MDNode *N : NewNodes)
5796 NamedMD->addOperand(N);
5797}
5798
5799/// This checks for objc retain release marker which should be upgraded. It
5800/// returns true if module is modified.
5802 bool Changed = false;
5803 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
5804 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
5805 if (ModRetainReleaseMarker) {
5806 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
5807 if (Op) {
5808 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
5809 if (ID) {
5810 SmallVector<StringRef, 4> ValueComp;
5811 ID->getString().split(ValueComp, "#");
5812 if (ValueComp.size() == 2) {
5813 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
5814 ID = MDString::get(M.getContext(), NewValue);
5815 }
5816 M.addModuleFlag(Module::Error, MarkerKey, ID);
5817 M.eraseNamedMetadata(ModRetainReleaseMarker);
5818 Changed = true;
5819 }
5820 }
5821 }
5822 return Changed;
5823}
5824
5826 // This lambda converts normal function calls to ARC runtime functions to
5827 // intrinsic calls.
5828 auto UpgradeToIntrinsic = [&](const char *OldFunc,
5829 llvm::Intrinsic::ID IntrinsicFunc) {
5830 Function *Fn = M.getFunction(OldFunc);
5831
5832 if (!Fn)
5833 return;
5834
5835 Function *NewFn =
5836 llvm::Intrinsic::getOrInsertDeclaration(&M, IntrinsicFunc);
5837
5838 for (User *U : make_early_inc_range(Fn->users())) {
5840 if (!CI || CI->getCalledFunction() != Fn)
5841 continue;
5842
5843 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
5844 FunctionType *NewFuncTy = NewFn->getFunctionType();
5846
5847 // Don't upgrade the intrinsic if it's not valid to bitcast the return
5848 // value to the return type of the old function.
5849 if (NewFuncTy->getReturnType() != CI->getType() &&
5850 !CastInst::castIsValid(Instruction::BitCast, CI,
5851 NewFuncTy->getReturnType()))
5852 continue;
5853
5854 bool InvalidCast = false;
5855
5856 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
5857 Value *Arg = CI->getArgOperand(I);
5858
5859 // Bitcast argument to the parameter type of the new function if it's
5860 // not a variadic argument.
5861 if (I < NewFuncTy->getNumParams()) {
5862 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
5863 // to the parameter type of the new function.
5864 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
5865 NewFuncTy->getParamType(I))) {
5866 InvalidCast = true;
5867 break;
5868 }
5869 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
5870 }
5871 Args.push_back(Arg);
5872 }
5873
5874 if (InvalidCast)
5875 continue;
5876
5877 // Create a call instruction that calls the new function.
5878 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
5879 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5880 NewCall->takeName(CI);
5881
5882 // Bitcast the return value back to the type of the old call.
5883 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
5884
5885 if (!CI->use_empty())
5886 CI->replaceAllUsesWith(NewRetVal);
5887 CI->eraseFromParent();
5888 }
5889
5890 if (Fn->use_empty())
5891 Fn->eraseFromParent();
5892 };
5893
5894 // Unconditionally convert a call to "clang.arc.use" to a call to
5895 // "llvm.objc.clang.arc.use".
5896 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
5897
5898 // Upgrade the retain release marker. If there is no need to upgrade
5899 // the marker, that means either the module is already new enough to contain
5900 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
5902 return;
5903
5904 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
5905 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
5906 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
5907 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
5908 {"objc_autoreleaseReturnValue",
5909 llvm::Intrinsic::objc_autoreleaseReturnValue},
5910 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
5911 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
5912 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
5913 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
5914 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
5915 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
5916 {"objc_release", llvm::Intrinsic::objc_release},
5917 {"objc_retain", llvm::Intrinsic::objc_retain},
5918 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
5919 {"objc_retainAutoreleaseReturnValue",
5920 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
5921 {"objc_retainAutoreleasedReturnValue",
5922 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
5923 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
5924 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
5925 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
5926 {"objc_unsafeClaimAutoreleasedReturnValue",
5927 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
5928 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
5929 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
5930 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
5931 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
5932 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
5933 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
5934 {"objc_arc_annotation_topdown_bbstart",
5935 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
5936 {"objc_arc_annotation_topdown_bbend",
5937 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
5938 {"objc_arc_annotation_bottomup_bbstart",
5939 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
5940 {"objc_arc_annotation_bottomup_bbend",
5941 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
5942
5943 for (auto &I : RuntimeFuncs)
5944 UpgradeToIntrinsic(I.first, I.second);
5945}
5946
5948 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
5949 if (!ModFlags)
5950 return false;
5951
5952 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
5953 bool HasSwiftVersionFlag = false;
5954 uint8_t SwiftMajorVersion, SwiftMinorVersion;
5955 uint32_t SwiftABIVersion;
5956 auto Int8Ty = Type::getInt8Ty(M.getContext());
5957 auto Int32Ty = Type::getInt32Ty(M.getContext());
5958
5959 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
5960 MDNode *Op = ModFlags->getOperand(I);
5961 if (Op->getNumOperands() != 3)
5962 continue;
5963 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
5964 if (!ID)
5965 continue;
5966 auto SetBehavior = [&](Module::ModFlagBehavior B) {
5967 Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
5968 Type::getInt32Ty(M.getContext()), B)),
5969 MDString::get(M.getContext(), ID->getString()),
5970 Op->getOperand(2)};
5971 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5972 Changed = true;
5973 };
5974
5975 if (ID->getString() == "Objective-C Image Info Version")
5976 HasObjCFlag = true;
5977 if (ID->getString() == "Objective-C Class Properties")
5978 HasClassProperties = true;
5979 // Upgrade PIC from Error/Max to Min.
5980 if (ID->getString() == "PIC Level") {
5981 if (auto *Behavior =
5983 uint64_t V = Behavior->getLimitedValue();
5984 if (V == Module::Error || V == Module::Max)
5985 SetBehavior(Module::Min);
5986 }
5987 }
5988 // Upgrade "PIE Level" from Error to Max.
5989 if (ID->getString() == "PIE Level")
5990 if (auto *Behavior =
5992 if (Behavior->getLimitedValue() == Module::Error)
5993 SetBehavior(Module::Max);
5994
5995 // Upgrade branch protection and return address signing module flags. The
5996 // module flag behavior for these fields were Error and now they are Min.
5997 if (ID->getString() == "branch-target-enforcement" ||
5998 ID->getString().starts_with("sign-return-address")) {
5999 if (auto *Behavior =
6001 if (Behavior->getLimitedValue() == Module::Error) {
6002 Type *Int32Ty = Type::getInt32Ty(M.getContext());
6003 Metadata *Ops[3] = {
6004 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
6005 Op->getOperand(1), Op->getOperand(2)};
6006 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6007 Changed = true;
6008 }
6009 }
6010 }
6011
6012 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
6013 // section name so that llvm-lto will not complain about mismatching
6014 // module flags that is functionally the same.
6015 if (ID->getString() == "Objective-C Image Info Section") {
6016 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
6017 SmallVector<StringRef, 4> ValueComp;
6018 Value->getString().split(ValueComp, " ");
6019 if (ValueComp.size() != 1) {
6020 std::string NewValue;
6021 for (auto &S : ValueComp)
6022 NewValue += S.str();
6023 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
6024 MDString::get(M.getContext(), NewValue)};
6025 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6026 Changed = true;
6027 }
6028 }
6029 }
6030
6031 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
6032 // If the higher bits are set, it adds new module flag for swift info.
6033 if (ID->getString() == "Objective-C Garbage Collection") {
6034 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
6035 if (Md) {
6036 assert(Md->getValue() && "Expected non-empty metadata");
6037 auto Type = Md->getValue()->getType();
6038 if (Type == Int8Ty)
6039 continue;
6040 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6041 if ((Val & 0xff) != Val) {
6042 HasSwiftVersionFlag = true;
6043 SwiftABIVersion = (Val & 0xff00) >> 8;
6044 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6045 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6046 }
6047 Metadata *Ops[3] = {
6049 Op->getOperand(1),
6050 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
6051 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6052 Changed = true;
6053 }
6054 }
6055
6056 if (ID->getString() == "amdgpu_code_object_version") {
6057 Metadata *Ops[3] = {
6058 Op->getOperand(0),
6059 MDString::get(M.getContext(), "amdhsa_code_object_version"),
6060 Op->getOperand(2)};
6061 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6062 Changed = true;
6063 }
6064 }
6065
6066 // "Objective-C Class Properties" is recently added for Objective-C. We
6067 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
6068 // flag of value 0, so we can correclty downgrade this flag when trying to
6069 // link an ObjC bitcode without this module flag with an ObjC bitcode with
6070 // this module flag.
6071 if (HasObjCFlag && !HasClassProperties) {
6072 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
6073 (uint32_t)0);
6074 Changed = true;
6075 }
6076
6077 if (HasSwiftVersionFlag) {
6078 M.addModuleFlag(Module::Error, "Swift ABI Version",
6079 SwiftABIVersion);
6080 M.addModuleFlag(Module::Error, "Swift Major Version",
6081 ConstantInt::get(Int8Ty, SwiftMajorVersion));
6082 M.addModuleFlag(Module::Error, "Swift Minor Version",
6083 ConstantInt::get(Int8Ty, SwiftMinorVersion));
6084 Changed = true;
6085 }
6086
6087 return Changed;
6088}
6089
6091 auto TrimSpaces = [](StringRef Section) -> std::string {
6092 SmallVector<StringRef, 5> Components;
6093 Section.split(Components, ',');
6094
6095 SmallString<32> Buffer;
6096 raw_svector_ostream OS(Buffer);
6097
6098 for (auto Component : Components)
6099 OS << ',' << Component.trim();
6100
6101 return std::string(OS.str().substr(1));
6102 };
6103
6104 for (auto &GV : M.globals()) {
6105 if (!GV.hasSection())
6106 continue;
6107
6108 StringRef Section = GV.getSection();
6109
6110 if (!Section.starts_with("__DATA, __objc_catlist"))
6111 continue;
6112
6113 // __DATA, __objc_catlist, regular, no_dead_strip
6114 // __DATA,__objc_catlist,regular,no_dead_strip
6115 GV.setSection(TrimSpaces(Section));
6116 }
6117}
6118
6119namespace {
6120// Prior to LLVM 10.0, the strictfp attribute could be used on individual
6121// callsites within a function that did not also have the strictfp attribute.
6122// Since 10.0, if strict FP semantics are needed within a function, the
6123// function must have the strictfp attribute and all calls within the function
6124// must also have the strictfp attribute. This latter restriction is
6125// necessary to prevent unwanted libcall simplification when a function is
6126// being cloned (such as for inlining).
6127//
6128// The "dangling" strictfp attribute usage was only used to prevent constant
6129// folding and other libcall simplification. The nobuiltin attribute on the
6130// callsite has the same effect.
6131struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
6132 StrictFPUpgradeVisitor() = default;
6133
6134 void visitCallBase(CallBase &Call) {
6135 if (!Call.isStrictFP())
6136 return;
6138 return;
6139 // If we get here, the caller doesn't have the strictfp attribute
6140 // but this callsite does. Replace the strictfp attribute with nobuiltin.
6141 Call.removeFnAttr(Attribute::StrictFP);
6142 Call.addFnAttr(Attribute::NoBuiltin);
6143 }
6144};
6145
6146/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
6147struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6148 : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6149 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
6150
6151 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6152 if (!RMW.isFloatingPointOperation())
6153 return;
6154
6155 MDNode *Empty = MDNode::get(RMW.getContext(), {});
6156 RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);
6157 RMW.setMetadata("amdgpu.no.remote.memory.access", Empty);
6158 RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
6159 }
6160};
6161} // namespace
6162
6164 // If a function definition doesn't have the strictfp attribute,
6165 // convert any callsite strictfp attributes to nobuiltin.
6166 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
6167 StrictFPUpgradeVisitor SFPV;
6168 SFPV.visit(F);
6169 }
6170
6171 // Remove all incompatibile attributes from function.
6172 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6173 F.getReturnType(), F.getAttributes().getRetAttrs()));
6174 for (auto &Arg : F.args())
6175 Arg.removeAttrs(
6176 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6177
6178 // Older versions of LLVM treated an "implicit-section-name" attribute
6179 // similarly to directly setting the section on a Function.
6180 if (Attribute A = F.getFnAttribute("implicit-section-name");
6181 A.isValid() && A.isStringAttribute()) {
6182 F.setSection(A.getValueAsString());
6183 F.removeFnAttr("implicit-section-name");
6184 }
6185
6186 if (!F.empty()) {
6187 // For some reason this is called twice, and the first time is before any
6188 // instructions are loaded into the body.
6189
6190 if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");
6191 A.isValid()) {
6192
6193 if (A.getValueAsBool()) {
6194 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6195 Visitor.visit(F);
6196 }
6197
6198 // We will leave behind dead attribute uses on external declarations, but
6199 // clang never added these to declarations anyway.
6200 F.removeFnAttr("amdgpu-unsafe-fp-atomics");
6201 }
6202 }
6203}
6204
6205// Check if the function attribute is not present and set it.
6207 StringRef Value) {
6208 if (!F.hasFnAttribute(FnAttrName))
6209 F.addFnAttr(FnAttrName, Value);
6210}
6211
6212// Check if the function attribute is not present and set it if needed.
6213// If the attribute is "false" then removes it.
6214// If the attribute is "true" resets it to a valueless attribute.
6215static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName) {
6216 if (!F.hasFnAttribute(FnAttrName)) {
6217 if (Set)
6218 F.addFnAttr(FnAttrName);
6219 } else {
6220 auto A = F.getFnAttribute(FnAttrName);
6221 if ("false" == A.getValueAsString())
6222 F.removeFnAttr(FnAttrName);
6223 else if ("true" == A.getValueAsString()) {
6224 F.removeFnAttr(FnAttrName);
6225 F.addFnAttr(FnAttrName);
6226 }
6227 }
6228}
6229
6231 Triple T(M.getTargetTriple());
6232 if (!T.isThumb() && !T.isARM() && !T.isAArch64())
6233 return;
6234
6235 uint64_t BTEValue = 0;
6236 uint64_t BPPLRValue = 0;
6237 uint64_t GCSValue = 0;
6238 uint64_t SRAValue = 0;
6239 uint64_t SRAALLValue = 0;
6240 uint64_t SRABKeyValue = 0;
6241
6242 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6243 if (ModFlags) {
6244 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6245 MDNode *Op = ModFlags->getOperand(I);
6246 if (Op->getNumOperands() != 3)
6247 continue;
6248
6249 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6250 auto *CI = mdconst::dyn_extract<ConstantInt>(Op->getOperand(2));
6251 if (!ID || !CI)
6252 continue;
6253
6254 StringRef IDStr = ID->getString();
6255 uint64_t *ValPtr = IDStr == "branch-target-enforcement" ? &BTEValue
6256 : IDStr == "branch-protection-pauth-lr" ? &BPPLRValue
6257 : IDStr == "guarded-control-stack" ? &GCSValue
6258 : IDStr == "sign-return-address" ? &SRAValue
6259 : IDStr == "sign-return-address-all" ? &SRAALLValue
6260 : IDStr == "sign-return-address-with-bkey"
6261 ? &SRABKeyValue
6262 : nullptr;
6263 if (!ValPtr)
6264 continue;
6265
6266 *ValPtr = CI->getZExtValue();
6267 if (*ValPtr == 2)
6268 return;
6269 }
6270 }
6271
6272 bool BTE = BTEValue == 1;
6273 bool BPPLR = BPPLRValue == 1;
6274 bool GCS = GCSValue == 1;
6275 bool SRA = SRAValue == 1;
6276
6277 StringRef SignTypeValue = "non-leaf";
6278 if (SRA && SRAALLValue == 1)
6279 SignTypeValue = "all";
6280
6281 StringRef SignKeyValue = "a_key";
6282 if (SRA && SRABKeyValue == 1)
6283 SignKeyValue = "b_key";
6284
6285 for (Function &F : M.getFunctionList()) {
6286 if (F.isDeclaration())
6287 continue;
6288
6289 if (SRA) {
6290 setFunctionAttrIfNotSet(F, "sign-return-address", SignTypeValue);
6291 setFunctionAttrIfNotSet(F, "sign-return-address-key", SignKeyValue);
6292 } else {
6293 if (auto A = F.getFnAttribute("sign-return-address");
6294 A.isValid() && "none" == A.getValueAsString()) {
6295 F.removeFnAttr("sign-return-address");
6296 F.removeFnAttr("sign-return-address-key");
6297 }
6298 }
6299 ConvertFunctionAttr(F, BTE, "branch-target-enforcement");
6300 ConvertFunctionAttr(F, BPPLR, "branch-protection-pauth-lr");
6301 ConvertFunctionAttr(F, GCS, "guarded-control-stack");
6302 }
6303
6304 if (BTE)
6305 M.setModuleFlag(llvm::Module::Min, "branch-target-enforcement", 2);
6306 if (BPPLR)
6307 M.setModuleFlag(llvm::Module::Min, "branch-protection-pauth-lr", 2);
6308 if (GCS)
6309 M.setModuleFlag(llvm::Module::Min, "guarded-control-stack", 2);
6310 if (SRA) {
6311 M.setModuleFlag(llvm::Module::Min, "sign-return-address", 2);
6312 if (SRAALLValue == 1)
6313 M.setModuleFlag(llvm::Module::Min, "sign-return-address-all", 2);
6314 if (SRABKeyValue == 1)
6315 M.setModuleFlag(llvm::Module::Min, "sign-return-address-with-bkey", 2);
6316 }
6317}
6318
6319static bool isOldLoopArgument(Metadata *MD) {
6320 auto *T = dyn_cast_or_null<MDTuple>(MD);
6321 if (!T)
6322 return false;
6323 if (T->getNumOperands() < 1)
6324 return false;
6325 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
6326 if (!S)
6327 return false;
6328 return S->getString().starts_with("llvm.vectorizer.");
6329}
6330
6332 StringRef OldPrefix = "llvm.vectorizer.";
6333 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
6334
6335 if (OldTag == "llvm.vectorizer.unroll")
6336 return MDString::get(C, "llvm.loop.interleave.count");
6337
6338 return MDString::get(
6339 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
6340 .str());
6341}
6342
6344 auto *T = dyn_cast_or_null<MDTuple>(MD);
6345 if (!T)
6346 return MD;
6347 if (T->getNumOperands() < 1)
6348 return MD;
6349 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
6350 if (!OldTag)
6351 return MD;
6352 if (!OldTag->getString().starts_with("llvm.vectorizer."))
6353 return MD;
6354
6355 // This has an old tag. Upgrade it.
6357 Ops.reserve(T->getNumOperands());
6358 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
6359 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
6360 Ops.push_back(T->getOperand(I));
6361
6362 return MDTuple::get(T->getContext(), Ops);
6363}
6364
6366 auto *T = dyn_cast<MDTuple>(&N);
6367 if (!T)
6368 return &N;
6369
6370 if (none_of(T->operands(), isOldLoopArgument))
6371 return &N;
6372
6374 Ops.reserve(T->getNumOperands());
6375 for (Metadata *MD : T->operands())
6376 Ops.push_back(upgradeLoopArgument(MD));
6377
6378 return MDTuple::get(T->getContext(), Ops);
6379}
6380
6382 Triple T(TT);
6383 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
6384 // the address space of globals to 1. This does not apply to SPIRV Logical.
6385 if ((T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical())) &&
6386 !DL.contains("-G") && !DL.starts_with("G")) {
6387 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
6388 }
6389
6390 if (T.isLoongArch64() || T.isRISCV64()) {
6391 // Make i32 a native type for 64-bit LoongArch and RISC-V.
6392 auto I = DL.find("-n64-");
6393 if (I != StringRef::npos)
6394 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
6395 return DL.str();
6396 }
6397
6398 // AMDGPU data layout upgrades.
6399 std::string Res = DL.str();
6400 if (T.isAMDGPU()) {
6401 // Define address spaces for constants.
6402 if (!DL.contains("-G") && !DL.starts_with("G"))
6403 Res.append(Res.empty() ? "G1" : "-G1");
6404
6405 // AMDGCN data layout upgrades.
6406 if (T.isAMDGCN()) {
6407
6408 // Add missing non-integral declarations.
6409 // This goes before adding new address spaces to prevent incoherent string
6410 // values.
6411 if (!DL.contains("-ni") && !DL.starts_with("ni"))
6412 Res.append("-ni:7:8:9");
6413 // Update ni:7 to ni:7:8:9.
6414 if (DL.ends_with("ni:7"))
6415 Res.append(":8:9");
6416 if (DL.ends_with("ni:7:8"))
6417 Res.append(":9");
6418
6419 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
6420 // resources) An empty data layout has already been upgraded to G1 by now.
6421 if (!DL.contains("-p7") && !DL.starts_with("p7"))
6422 Res.append("-p7:160:256:256:32");
6423 if (!DL.contains("-p8") && !DL.starts_with("p8"))
6424 Res.append("-p8:128:128:128:48");
6425 constexpr StringRef OldP8("-p8:128:128-");
6426 if (DL.contains(OldP8))
6427 Res.replace(Res.find(OldP8), OldP8.size(), "-p8:128:128:128:48-");
6428 if (!DL.contains("-p9") && !DL.starts_with("p9"))
6429 Res.append("-p9:192:256:256:32");
6430 }
6431
6432 // Upgrade the ELF mangling mode.
6433 if (!DL.contains("m:e"))
6434 Res = Res.empty() ? "m:e" : "m:e-" + Res;
6435
6436 return Res;
6437 }
6438
6439 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
6440 // If the datalayout matches the expected format, add pointer size address
6441 // spaces to the datalayout.
6442 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
6443 if (!DL.contains(AddrSpaces)) {
6445 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6446 if (R.match(Res, &Groups))
6447 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
6448 }
6449 };
6450
6451 // AArch64 data layout upgrades.
6452 if (T.isAArch64()) {
6453 // Add "-Fn32"
6454 if (!DL.empty() && !DL.contains("-Fn32"))
6455 Res.append("-Fn32");
6456 AddPtr32Ptr64AddrSpaces();
6457 return Res;
6458 }
6459
6460 if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC64() ||
6461 T.isWasm()) {
6462 // Mips64 with o32 ABI did not add "-i128:128".
6463 // Add "-i128:128"
6464 std::string I64 = "-i64:64";
6465 std::string I128 = "-i128:128";
6466 if (!StringRef(Res).contains(I128)) {
6467 size_t Pos = Res.find(I64);
6468 if (Pos != size_t(-1))
6469 Res.insert(Pos + I64.size(), I128);
6470 }
6471 return Res;
6472 }
6473
6474 if (!T.isX86())
6475 return Res;
6476
6477 AddPtr32Ptr64AddrSpaces();
6478
6479 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
6480 // for i128 operations prior to this being reflected in the data layout, and
6481 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
6482 // boundaries, so although this is a breaking change, the upgrade is expected
6483 // to fix more IR than it breaks.
6484 // Intel MCU is an exception and uses 4-byte-alignment.
6485 if (!T.isOSIAMCU()) {
6486 std::string I128 = "-i128:128";
6487 if (StringRef Ref = Res; !Ref.contains(I128)) {
6489 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6490 if (R.match(Res, &Groups))
6491 Res = (Groups[1] + I128 + Groups[3]).str();
6492 }
6493 }
6494
6495 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
6496 // Raising the alignment is safe because Clang did not produce f80 values in
6497 // the MSVC environment before this upgrade was added.
6498 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
6499 StringRef Ref = Res;
6500 auto I = Ref.find("-f80:32-");
6501 if (I != StringRef::npos)
6502 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
6503 }
6504
6505 return Res;
6506}
6507
6508void llvm::UpgradeAttributes(AttrBuilder &B) {
6509 StringRef FramePointer;
6510 Attribute A = B.getAttribute("no-frame-pointer-elim");
6511 if (A.isValid()) {
6512 // The value can be "true" or "false".
6513 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
6514 B.removeAttribute("no-frame-pointer-elim");
6515 }
6516 if (B.contains("no-frame-pointer-elim-non-leaf")) {
6517 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
6518 if (FramePointer != "all")
6519 FramePointer = "non-leaf";
6520 B.removeAttribute("no-frame-pointer-elim-non-leaf");
6521 }
6522 if (!FramePointer.empty())
6523 B.addAttribute("frame-pointer", FramePointer);
6524
6525 A = B.getAttribute("null-pointer-is-valid");
6526 if (A.isValid()) {
6527 // The value can be "true" or "false".
6528 bool NullPointerIsValid = A.getValueAsString() == "true";
6529 B.removeAttribute("null-pointer-is-valid");
6530 if (NullPointerIsValid)
6531 B.addAttribute(Attribute::NullPointerIsValid);
6532 }
6533}
6534
6535void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
6536 // clang.arc.attachedcall bundles are now required to have an operand.
6537 // If they don't, it's okay to drop them entirely: when there is an operand,
6538 // the "attachedcall" is meaningful and required, but without an operand,
6539 // it's just a marker NOP. Dropping it merely prevents an optimization.
6540 erase_if(Bundles, [&](OperandBundleDef &OBD) {
6541 return OBD.getTag() == "clang.arc.attachedcall" &&
6542 OBD.inputs().empty();
6543 });
6544}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)
static MDNode * getDebugLocSafe(const Instruction *I)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define R2(n)
This file contains the declarations for metadata subclasses.
#define T
NVPTX address space definition.
uint64_t High
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ FAdd
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:69
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:536
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DWARF expression.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Diagnostic information for debug metadata version reporting.
Diagnostic information for stripping invalid debug metadata.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
void setApproxFunc(bool B=true)
Definition FMF.h:93
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:166
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition Function.h:244
const Function & getFunction() const
Definition Function.h:164
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition Function.cpp:451
size_t arg_size() const
Definition Function.h:899
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:214
Argument * getArg(unsigned i) const
Definition Function.h:884
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:605
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
Base class for instruction visitors.
Definition InstVisitor.h:78
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition MDBuilder.cpp:96
Metadata node.
Definition Metadata.h:1078
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1442
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1448
LLVMContext & getContext() const
Definition Metadata.h:1242
Tracking metadata reference owned by Metadata.
Definition Metadata.h:900
A single uniqued string.
Definition Metadata.h:721
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:608
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1526
Metadata wrapper in the Value hierarchy.
Definition Metadata.h:183
static LLVM_ABI MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition Metadata.cpp:104
Root of the metadata hierarchy.
Definition Metadata.h:64
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
Definition Module.h:117
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
Definition Module.h:138
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
Definition Module.h:120
@ Min
Takes the min of the two values, which are required to be integers.
Definition Module.h:152
@ Max
Takes the max of the two values, which are required to be integers.
Definition Module.h:149
A tuple of MDNodes.
Definition Metadata.h:1757
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
Definition Metadata.h:1853
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
StringRef getTag() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition Regex.cpp:83
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition Type.cpp:824
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:854
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:702
static constexpr size_t npos
Definition StringRef.h:57
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:573
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:611
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
Definition StringRef.h:816
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:413
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:297
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:153
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition Type.h:145
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:294
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:270
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition Type.h:225
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:284
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
Definition Type.cpp:283
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:282
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:390
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:701
bool use_empty() const
Definition Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099
bool hasName() const
Definition Value.h:262
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
Definition Metadata.h:708
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
Definition Metadata.h:695
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:667
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
@ Length
Definition DWP.cpp:532
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1667
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:296
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
Definition Casting.h:732
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
Op::Description Desc
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2132
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ DEBUG_METADATA_VERSION
Definition Metadata.h:54
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106